library(data.table)
library(ggplot2)
library(cowplot)
library(readxl)
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.7 ✔ dplyr 1.0.7
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(webchem)
library(vegan)## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
library(fastcluster)##
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
##
## hclust
library(RColorBrewer)
library(ggpubr)##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
library(ComplexHeatmap)## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.13.1
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggrepel)
library(patchwork)##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
library(KEGGREST)
theme_set(theme_classic2())
datadir <- ""
outdir <- "figure1/"
dir.create(outdir)
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] KEGGREST_1.32.0 patchwork_1.1.1 ggrepel_0.9.1
## [4] ComplexHeatmap_2.13.1 ggpubr_0.4.0 RColorBrewer_1.1-2
## [7] fastcluster_1.2.3 vegan_2.6-2 lattice_0.20-45
## [10] permute_0.9-5 webchem_1.1.2 forcats_0.5.1
## [13] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
## [16] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [19] tidyverse_1.3.1 readxl_1.3.1 cowplot_1.1.1
## [22] ggplot2_3.3.6 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.34.0
## [7] GlobalOptions_0.1.2 fs_1.5.0 clue_0.3-61
## [10] rstudioapi_0.13 fansi_1.0.3 lubridate_1.8.0
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.1.1
## [16] doParallel_1.0.17 knitr_1.39 jsonlite_1.8.0
## [19] broom_0.8.0 cluster_2.1.3 dbplyr_2.1.1
## [22] png_0.1-7 data.tree_1.0.0 compiler_4.1.1
## [25] httr_1.4.2 backports_1.2.1 assertthat_0.2.1
## [28] Matrix_1.4-1 fastmap_1.1.0 cli_3.3.0
## [31] htmltools_0.5.2 tools_4.1.1 gtable_0.3.0
## [34] glue_1.6.2 GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3
## [37] carData_3.0-5 cellranger_1.1.0 jquerylib_0.1.4
## [40] vctrs_0.4.1 Biostrings_2.62.0 nlme_3.1-159
## [43] iterators_1.0.13 xfun_0.31 rvest_1.0.2
## [46] lifecycle_1.0.0 rstatix_0.7.0 zlibbioc_1.40.0
## [49] MASS_7.3-57 scales_1.1.1 hms_1.1.1
## [52] parallel_4.1.1 yaml_2.3.5 sass_0.4.1
## [55] stringi_1.7.6 S4Vectors_0.32.4 foreach_1.5.2
## [58] BiocGenerics_0.40.0 shape_1.4.6 GenomeInfoDb_1.30.1
## [61] rlang_1.0.2 pkgconfig_2.0.3 matrixStats_0.62.0
## [64] bitops_1.0-7 evaluate_0.15 tidyselect_1.1.2
## [67] magrittr_2.0.3 R6_2.5.1 IRanges_2.28.0
## [70] generics_0.1.0 DBI_1.1.1 pillar_1.7.0
## [73] haven_2.5.0 withr_2.5.0 mgcv_1.8-40
## [76] abind_1.4-5 RCurl_1.98-1.7 modelr_0.1.8
## [79] crayon_1.4.1 car_3.0-13 utf8_1.2.2
## [82] tzdb_0.3.0 rmarkdown_2.14 GetoptLong_1.0.5
## [85] reprex_2.0.1 digest_0.6.29 stats4_4.1.1
## [88] munsell_0.5.0 bslib_0.3.1
####### Read in processed datasets
processed_data <- fread("processedDatasets/timecourse_all_data.csv")
summary_data <- fread("processedDatasets/timecourse_mean_summaries.csv")
#growth_file<-paste0(datadir, "TimeCourse_take2_2020-8-17/ODdata.xlsx")
#### Add in OD/growth data
growth_file <- "../TimeCourse_take2_2020-8-17/ODdata.xlsx"
growth_data <- read_xlsx(growth_file, sheet = 1)
time_pts <- read_xlsx(growth_file, sheet = 2)
time_pts <- data.table(time_pts %>% mutate(TimePoint = as.numeric(gsub("TP", "", TimePoint))))
growth_data <- data.table(growth_data) %>% melt(id.var = c("Strain", "AcConc", "Replicate"))
growth_data[,Time:=as.numeric(gsub(".*_", "", variable))]
growth2 <- growth_data
setnames(growth2, "value", "OD600")
growth2[Time==41.5, Time:=42]
growth2[Time==26.5, Time:=26]
growth2[Time==30.5, Time:=30]
mean_growth2 <- growth2[, .(meanOD =mean(OD600), sdOD = sd(OD600)), by=list(Strain, AcConc, Time)]
############# Growth data plots
growth_data_sub <- growth2[Strain == "2243" & AcConc==1, list(mean(OD600), sd(OD600)), by = Time]
growth_data_sub <- rbind(growth_data_sub, data.table(Time = 0, V1 = 0.01), fill = T)
growth2_sub <- growth2[Strain == "2243" & AcConc==1]
growth2_sub <- rbind(growth2_sub, data.table(Time = rep(0, 3), OD600 = rep(0.01, 3), Replicate = 1:3), fill = T)
growth_plot_heatmap <- ggplot(growth2_sub[,mean(OD600), by=Time], aes(x = factor(Time), y = 1, fill = V1)) + geom_tile(color = "black") + xlab("Time (hr)") +
theme_minimal() + scale_fill_gradient(low = "white", high = "gray20", name = "E. lenta DSM 2243 abundance\n(mean normalized OD600)") + theme(axis.text.y = element_blank(),
axis.title.y = element_blank())
ggsave(growth_plot_heatmap, file = paste0(outdir, "growth_plot_timecourse_heatmap.pdf"), width = 5, height = 1.2)
growth_plot_heatmapsummary_data[TimePoint==6 & AcConc==1 & Strain == "2243" , table(CF_superclass)]## CF_superclass
##
## 2
## Alkaloids and derivatives
## 6
## Lignans, neolignans and related compounds
## 3
## Lipids and lipid-like molecules
## 1
## no matches
## 3874
## Nucleosides, nucleotides, and analogues
## 43
## Organic acids and derivatives
## 221
## Organic nitrogen compounds
## 2
## Organic oxygen compounds
## 4
## Organoheterocyclic compounds
## 91
## Phenylpropanoids and polyketides
## 2
summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" , table(MSI)]## MSI
## 1 1,4 2 2,4 3 4
## 4090 67 4 35 2 42 9
#### Add Other bucket to superclass annotation
class_counts <- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243", length(unique(IDUniq)), by = CF_superclass][order(V1, decreasing = T)]
summary_data[,CF_superclass2:=ifelse(CF_superclass %in% class_counts[V1 < 5, CF_superclass], "Other", CF_superclass)]
#### Volcano plot
volcano_plot_gnps <- ggplot(summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & CF_superclass == "no matches"], aes(x = log2FC_adj, y = -1*log10(p.value))) +
geom_vline(xintercept = 1, linetype = 2) + geom_vline(xintercept = -1, linetype = 2) +
geom_hline(yintercept = 0.6, linetype = 2) +
geom_point(shape = 21, fill = "gray30", alpha = 0.8, size= 1.2) + geom_point(data = summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & CF_superclass != "no matches"], aes(fill = CF_superclass2), shape = 21, size = 1.8) +
scale_fill_manual(values = c(brewer.pal(12, "Set3")[c(1:2,4:8, 10:12)]), name = "Superclass") +
theme(legend.position = "bottom") + guides(fill = guide_legend(ncol = 1)) +
xlab("log2 fold change E. lenta DSM 2243 \nvs. sterile controls at 50hr")
ggsave(volcano_plot_gnps, file = paste0(outdir, "volcano_plot_gnps.pdf"), width = 4, height = 5)
volcano_plot_gnps#### Summary stats
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(FDRCorrect < 0.1 )]##
## FALSE TRUE
## 3483 612
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1, table(log2FC > 0)]##
## FALSE TRUE
## 168 444
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1 & log2FC < 0 & MSI != ""]## IonMode AlignmentID IDUniq Strain AcConc TimePoint
## 1: Pos 1069 146.09215_7.621_1069_Pos 2243 1 6
## 2: Pos 1158 152.05757_6.515_1158_Pos 2243 1 6
## 3: Pos 1543 175.11842_9.472_1543_Pos 2243 1 6
## 4: Pos 2624 272.17151_9.39_2624_Pos 2243 1 6
## 5: Pos 2635 274.18716_9.265_2635_Pos 2243 1 6
## 6: Pos 2755 293.09756_9.69_2755_Pos 2243 1 6
## 7: Pos 2898 322.18713_8.665_2898_Pos 2243 1 6
## 8: Pos 3122 377.14465_6.11_3122_Pos 2243 1 6
## AvgMZ AvgRt Adduct MSI MetName Formula
## 1: 146.0922 7.621 [M+H]+ 1 4-Guanidinobutyric acid null
## 2: 152.0576 6.515 [M+H]+ 1 Guanine null
## 3: 175.1184 9.472 [M+H]+ 1 Arginine null
## 4: 272.1715 9.390 [M+H]+ 3 Pro-Arg C11H21N5O3
## 5: 274.1872 9.265 [M+H]+ 3 Val-Arg C11H23N5O3
## 6: 293.0976 9.690 [M+H]+ 3 Ethylenediaminetetraacetic acid C10H16N2O8
## 7: 322.1871 8.665 [M+H]+ 3 Arg-Phe C15H23N5O3
## 8: 377.1447 6.110 [M+H]+ 1 Riboflavin null
## INCHIKEY
## 1: TUHVEAJXIMEOSA-UHFFFAOYSA-N
## 2: UYTPUPDQBNUYGX-UHFFFAOYSA-N
## 3: ODKSFYDXXFIFQN-BYPYZUCNSA-N
## 4: HMNSRTLZAJHSIK-YUMQZZPRSA-N
## 5: IBIDRSSEHFLGSD-YUMQZZPRSA-N
## 6: KCXVZYZYPLLWCC-UHFFFAOYSA-N
## 7: PQBHGSGQZSOLIR-RYUDHWBXSA-N
## 8: AUNGANRZJHBGPY-SCRDCRAPSA-N
## MS/MS spectrum
## 1: 58.06606:72922 60.05321:66028 60.0569:599196 64.89349:41553 69.03465:55517 86.05511:114973 86.06149:1571713 87.03905:158058 87.04552:2257787 88.04843:97862 100.02277:85739 100.11342:78052 104.07082:613889 111.05553:232843 128.08371:426668 129.06721:102166 131.96037:44959 146.09225:3108444
## 2: 51.32462:2113 56.05073:3474 58.06278:4488 58.06643:23230 59.07427:28091 64.85785:2362 89.08133:2599 106.04996:4090 110.06185:3784 110.07248:3325 124.07721:3926 128.04654:4485 134.06007:20088 135.03186:5714 135.39624:2446 136.07767:3755 151.06358:7428 151.08888:10206 151.12454:3074 152.03471:5793 152.04799:61400 152.05885:237983 152.08395:5470 152.09494:7853 152.10806:4838 152.12015:4354
## 3: 60.05695:63174348 70.06654:80183040 71.06964:1397201 72.08225:5456357 75.7781:1277058 97.0774:1548582 112.08871:6907542 114.10384:1522407 115.08826:1615821 116.07243:63837528 117.07515:1738751 130.09753:24050896 131.1311:970662 153.58105:837189 157.10864:4184034 158.09486:21616900 159.09869:914629 175.09473:2739370 175.11891:96326112
## 4: 50.69818:4404 60.05609:77404 70.06555:492721 71.06878:10967 72.08119:6452 96.16372:4730 98.06012:5392 99.09158:7022 107.7388:5017 112.08691:77599 113.07125:8681 114.10253:12101 115.08661:62883 116.07067:78051 118.41836:5492 129.71727:4949 130.09721:26590 133.09711:8301 140.08176:97906 143.08119:6815 157.10837:27301 158.09212:113444 175.11874:970866 176.12241:35772 185.10306:40925 194.12886:15717 195.11305:25603 210.12369:11867 212.13937:19653 213.12263:27584 230.1483:10720 254.16022:61830 254.24603:6216 255.14436:134534 256.14786:11544 272.17126:1426461
## 5: 55.0547:15463 60.05608:102951 66.50826:4355 67.29038:4059 70.06551:124004 72.08118:465248 73.08464:14998 88.07607:10784 97.07624:5000 106.08628:7751 112.08704:41433 113.07136:8966 114.10278:22531 115.08649:58367 116.0705:101542 116.07899:8226 130.0974:30870 133.09721:11452 142.09738:175608 143.0811:5341 157.1086:20760 158.09216:176379 158.10696:9209 159.09474:6943 169.13359:8064 175.11868:862378 176.12187:29410 185.10313:17329 197.1277:6440 203.02045:5161 211.15466:6815 212.13954:6743 214.15524:21786 215.13882:34314 230.19704:6247 232.16603:5204 239.14957:60627 240.13196:7829 256.17685:21141 256.26334:8640 257.16022:148428 258.1636:12321 274.18695:1731266 274.2738:160767
## 6: 56.04991:5069 60.04483:13243 71.19521:2821 74.06039:26183 86.06026:50609 88.03956:59341 88.09402:3001 102.0549:22071 104.07059:8826 114.04634:6071 114.05495:82077 114.06402:4340 115.0585:2801 118.08605:12960 132.06537:593407 133.069:18803 134.04456:9357 136.0717:2919 149.02286:6593 160.06017:1124401 161.06354:42334 163.80119:2754 175.11909:5712 195.02496:3848 247.09146:31492 270.28195:3253 270.89111:43685 288.90121:14245 292.13159:4401 293.09741:141391
## 7: 53.71065:3178 60.05614:156553 61.47846:2797 64.54952:2682 70.06559:542410 71.0495:8247 71.06894:10802 72.08135:4733 81.07046:3948 83.0856:3533 87.09188:4947 88.07629:7913 97.07616:13664 97.10152:3768 98.06018:12706 104.89079:3000 112.08708:113797 113.07162:3062 113.09048:4418 115.08681:28664 116.07069:30090 120.08089:62017 121.08438:3996 130.09737:11300 140.08163:29556 148.67773:3283 157.10822:48823 158.09244:43480 166.08607:12178 175.11884:327435 176.12074:10676 190.09668:21014 200.10699:8684 205.19719:2935 217.13278:5985 238.58571:3152 242.12854:11398 245.12872:5465 246.11201:19731 259.15604:19132 262.15506:53502 263.13818:69244 264.14264:6860 287.14981:3966 288.13364:26701 289.13629:4303 304.17535:5316 305.16046:204370 306.16229:20619 321.19998:3200 321.31458:7245 322.18716:917216
## 8: 55.63405:2420 70.11493:2221 71.05062:3480 71.79011:2389 75.04574:2884 91.69588:2441 99.04575:4876 106.44118:2846 116.24726:2947 116.84214:2390 172.08841:2806 213.25253:3002 223.85902:2529 231.76869:2711 237.5387:3531 243.08759:58808 244.09593:3014 377.09677:5826 377.14499:77042
## BK Time MinFeatValue meanValue sdValue minValue
## 1: 76339.444 50 323322 88177336.0 10629261.05 80670080
## 2: NA 50 109602 457388.3 24303.33 430812
## 3: 28955131.847 50 19862148 2159625941.3 232124477.95 1947554816
## 4: NA 50 330950 1194353.7 80408.71 1102820
## 5: 1628.953 50 6011 27667.0 14420.65 11415
## 6: 0.000 50 1661 0.0 0.00 0
## 7: 0.000 50 25579 0.0 0.00 0
## 8: 0.000 50 9857 0.0 0.00 0
## maxValue medianValue meanLog10Value sdLog10Value minLog10Value
## 1: 100340008 83521920 7.943731 0.05087158 7.907147
## 2: 478482 462871 5.685184 0.02196743 5.661067
## 3: 2407616256 2123706752 9.333729 0.04620032 9.290596
## 4: 1253602 1226639 6.105635 0.02781781 6.073923
## 5: 38933 32653 4.417139 0.26748493 4.111187
## 6: 0 0 2.618310 0.00000000 2.618310
## 7: 0 0 3.805824 0.00000000 3.805824
## 8: 0 0 3.391685 0.00000000 3.391685
## maxLog10Value medianLog10Value ctrl_meanValue ctrl_sdValue log2FC
## 1: 8.001824 7.922221 8.837948e+07 6.727066e+07 -0.003300611
## 2: 5.704050 5.690437 7.367430e+06 5.671251e+06 -3.931088706
## 3: 9.382482 9.328109 2.722777e+09 1.594957e+09 -0.333612641
## 4: 6.125917 6.117065 1.211139e+06 9.127339e+05 -0.018838030
## 5: 4.606766 4.533464 2.978967e+04 2.402780e+04 -0.101340058
## 6: 2.618310 2.618310 2.870047e+05 2.885599e+05 -9.434964153
## 7: 3.805824 3.805824 2.694659e+06 2.044094e+06 -8.722418853
## 8: 3.391685 3.391685 5.442400e+04 5.577482e+04 -4.528910170
## ctrlMeanTP1To4 ctrlSDTP1To4 log2FC_adj estimate estimate1 estimate2
## 1: 144402854.0 20678641.0 -0.7111049 -0.2433690 7.943731 8.187100
## 2: 10977001.8 552236.8 -4.5045805 -1.3605009 5.685184 7.045685
## 3: 3711423680.0 220317999.1 -0.7798068 -0.2305154 9.333729 9.564244
## 4: 3468554.4 251964.2 -1.4754824 -0.4433025 6.105635 6.548937
## 5: 3704056.4 267599.9 -6.9890744 -2.1379656 4.417139 6.555104
## 6: 1201953.8 352478.9 -11.4996120 -3.3953950 2.618310 6.013705
## 7: 4227198.2 221295.1 -9.3707789 -2.8272269 3.805824 6.633050
## 8: 217209.9 29914.0 -6.4780728 -1.9021723 3.391685 5.293857
## statistic p.value FDRCorrect
## 1: -4.892989 1.008098e-02 0.0780370841
## 2: -74.415340 1.990020e-07 0.0004074567
## 3: -8.394682 9.808226e-03 0.0765041646
## 4: -25.157460 2.292129e-04 0.0080224498
## 5: -13.812490 5.016011e-03 0.0500989411
## 6: -54.607742 3.351762e-04 0.0093499440
## 7: -263.875552 1.436125e-05 0.0024662376
## 8: -74.924454 1.780889e-04 0.0073246647
## Compound_Name
## 1:
## 2:
## 3: Massbank:PB000419 Arginine|2-amino-5-(diaminomethylideneamino)pentanoic acid
## 4: Spectral Match to Pro-Arg from NIST14
## 5: Spectral Match to Val-Arg from NIST14
## 6: Ethylenediaminetetraacetic acid EDTA
## 7: Spectral Match to Arg-Phe from NIST14
## 8: Massbank:RP013102 Riboflavin|7,8-dimethyl-10-[(2S,3S,4R)-2,3,4,5-tetrahydroxypentyl]benzo[g]pteridine-2,4-dione
## CF_kingdom CF_superclass
## 1: Organic compounds Organic acids and derivatives
## 2: no matches no matches
## 3: Organic compounds Organic acids and derivatives
## 4: Organic compounds Organic acids and derivatives
## 5: Organic compounds Organic acids and derivatives
## 6: Organic compounds Organic acids and derivatives
## 7: Organic compounds Organic acids and derivatives
## 8: Organic compounds Organoheterocyclic compounds
## CF_class CF_subclass
## 1: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 2: no matches no matches
## 3: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 4: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 5: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 6: Carboxylic acids and derivatives Tetracarboxylic acids and derivatives
## 7: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 8: Pteridines and derivatives Alloxazines and isoalloxazines
## CF_Dparent CF_superclass2
## 1: N-acyl-L-alpha-amino acids Organic acids and derivatives
## 2: no matches no matches
## 3: Arginine and derivatives Organic acids and derivatives
## 4: Dipeptides Organic acids and derivatives
## 5: Dipeptides Organic acids and derivatives
## 6: Tetracarboxylic acids and derivatives Organic acids and derivatives
## 7: Dipeptides Organic acids and derivatives
## 8: Flavins Organoheterocyclic compounds
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(MSI=="", FDRCorrect < 0.1)]##
## FALSE TRUE
## FALSE 95 38
## TRUE 3388 574
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", fisher.test(table(MSI != "", FDRCorrect < 0.1))]##
## Fisher's Exact Test for Count Data
##
## data: table(MSI != "", FDRCorrect < 0.1)
## p-value = 3.558e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.559209 3.512528
## sample estimates:
## odds ratio
## 2.360247
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1 & log2FC < 0 & MSI != ""]## IonMode AlignmentID IDUniq Strain AcConc TimePoint
## 1: Pos 1069 146.09215_7.621_1069_Pos 2243 1 6
## 2: Pos 1158 152.05757_6.515_1158_Pos 2243 1 6
## 3: Pos 1543 175.11842_9.472_1543_Pos 2243 1 6
## 4: Pos 2624 272.17151_9.39_2624_Pos 2243 1 6
## 5: Pos 2635 274.18716_9.265_2635_Pos 2243 1 6
## 6: Pos 2755 293.09756_9.69_2755_Pos 2243 1 6
## 7: Pos 2898 322.18713_8.665_2898_Pos 2243 1 6
## 8: Pos 3122 377.14465_6.11_3122_Pos 2243 1 6
## AvgMZ AvgRt Adduct MSI MetName Formula
## 1: 146.0922 7.621 [M+H]+ 1 4-Guanidinobutyric acid null
## 2: 152.0576 6.515 [M+H]+ 1 Guanine null
## 3: 175.1184 9.472 [M+H]+ 1 Arginine null
## 4: 272.1715 9.390 [M+H]+ 3 Pro-Arg C11H21N5O3
## 5: 274.1872 9.265 [M+H]+ 3 Val-Arg C11H23N5O3
## 6: 293.0976 9.690 [M+H]+ 3 Ethylenediaminetetraacetic acid C10H16N2O8
## 7: 322.1871 8.665 [M+H]+ 3 Arg-Phe C15H23N5O3
## 8: 377.1447 6.110 [M+H]+ 1 Riboflavin null
## INCHIKEY
## 1: TUHVEAJXIMEOSA-UHFFFAOYSA-N
## 2: UYTPUPDQBNUYGX-UHFFFAOYSA-N
## 3: ODKSFYDXXFIFQN-BYPYZUCNSA-N
## 4: HMNSRTLZAJHSIK-YUMQZZPRSA-N
## 5: IBIDRSSEHFLGSD-YUMQZZPRSA-N
## 6: KCXVZYZYPLLWCC-UHFFFAOYSA-N
## 7: PQBHGSGQZSOLIR-RYUDHWBXSA-N
## 8: AUNGANRZJHBGPY-SCRDCRAPSA-N
## MS/MS spectrum
## 1: 58.06606:72922 60.05321:66028 60.0569:599196 64.89349:41553 69.03465:55517 86.05511:114973 86.06149:1571713 87.03905:158058 87.04552:2257787 88.04843:97862 100.02277:85739 100.11342:78052 104.07082:613889 111.05553:232843 128.08371:426668 129.06721:102166 131.96037:44959 146.09225:3108444
## 2: 51.32462:2113 56.05073:3474 58.06278:4488 58.06643:23230 59.07427:28091 64.85785:2362 89.08133:2599 106.04996:4090 110.06185:3784 110.07248:3325 124.07721:3926 128.04654:4485 134.06007:20088 135.03186:5714 135.39624:2446 136.07767:3755 151.06358:7428 151.08888:10206 151.12454:3074 152.03471:5793 152.04799:61400 152.05885:237983 152.08395:5470 152.09494:7853 152.10806:4838 152.12015:4354
## 3: 60.05695:63174348 70.06654:80183040 71.06964:1397201 72.08225:5456357 75.7781:1277058 97.0774:1548582 112.08871:6907542 114.10384:1522407 115.08826:1615821 116.07243:63837528 117.07515:1738751 130.09753:24050896 131.1311:970662 153.58105:837189 157.10864:4184034 158.09486:21616900 159.09869:914629 175.09473:2739370 175.11891:96326112
## 4: 50.69818:4404 60.05609:77404 70.06555:492721 71.06878:10967 72.08119:6452 96.16372:4730 98.06012:5392 99.09158:7022 107.7388:5017 112.08691:77599 113.07125:8681 114.10253:12101 115.08661:62883 116.07067:78051 118.41836:5492 129.71727:4949 130.09721:26590 133.09711:8301 140.08176:97906 143.08119:6815 157.10837:27301 158.09212:113444 175.11874:970866 176.12241:35772 185.10306:40925 194.12886:15717 195.11305:25603 210.12369:11867 212.13937:19653 213.12263:27584 230.1483:10720 254.16022:61830 254.24603:6216 255.14436:134534 256.14786:11544 272.17126:1426461
## 5: 55.0547:15463 60.05608:102951 66.50826:4355 67.29038:4059 70.06551:124004 72.08118:465248 73.08464:14998 88.07607:10784 97.07624:5000 106.08628:7751 112.08704:41433 113.07136:8966 114.10278:22531 115.08649:58367 116.0705:101542 116.07899:8226 130.0974:30870 133.09721:11452 142.09738:175608 143.0811:5341 157.1086:20760 158.09216:176379 158.10696:9209 159.09474:6943 169.13359:8064 175.11868:862378 176.12187:29410 185.10313:17329 197.1277:6440 203.02045:5161 211.15466:6815 212.13954:6743 214.15524:21786 215.13882:34314 230.19704:6247 232.16603:5204 239.14957:60627 240.13196:7829 256.17685:21141 256.26334:8640 257.16022:148428 258.1636:12321 274.18695:1731266 274.2738:160767
## 6: 56.04991:5069 60.04483:13243 71.19521:2821 74.06039:26183 86.06026:50609 88.03956:59341 88.09402:3001 102.0549:22071 104.07059:8826 114.04634:6071 114.05495:82077 114.06402:4340 115.0585:2801 118.08605:12960 132.06537:593407 133.069:18803 134.04456:9357 136.0717:2919 149.02286:6593 160.06017:1124401 161.06354:42334 163.80119:2754 175.11909:5712 195.02496:3848 247.09146:31492 270.28195:3253 270.89111:43685 288.90121:14245 292.13159:4401 293.09741:141391
## 7: 53.71065:3178 60.05614:156553 61.47846:2797 64.54952:2682 70.06559:542410 71.0495:8247 71.06894:10802 72.08135:4733 81.07046:3948 83.0856:3533 87.09188:4947 88.07629:7913 97.07616:13664 97.10152:3768 98.06018:12706 104.89079:3000 112.08708:113797 113.07162:3062 113.09048:4418 115.08681:28664 116.07069:30090 120.08089:62017 121.08438:3996 130.09737:11300 140.08163:29556 148.67773:3283 157.10822:48823 158.09244:43480 166.08607:12178 175.11884:327435 176.12074:10676 190.09668:21014 200.10699:8684 205.19719:2935 217.13278:5985 238.58571:3152 242.12854:11398 245.12872:5465 246.11201:19731 259.15604:19132 262.15506:53502 263.13818:69244 264.14264:6860 287.14981:3966 288.13364:26701 289.13629:4303 304.17535:5316 305.16046:204370 306.16229:20619 321.19998:3200 321.31458:7245 322.18716:917216
## 8: 55.63405:2420 70.11493:2221 71.05062:3480 71.79011:2389 75.04574:2884 91.69588:2441 99.04575:4876 106.44118:2846 116.24726:2947 116.84214:2390 172.08841:2806 213.25253:3002 223.85902:2529 231.76869:2711 237.5387:3531 243.08759:58808 244.09593:3014 377.09677:5826 377.14499:77042
## BK Time MinFeatValue meanValue sdValue minValue
## 1: 76339.444 50 323322 88177336.0 10629261.05 80670080
## 2: NA 50 109602 457388.3 24303.33 430812
## 3: 28955131.847 50 19862148 2159625941.3 232124477.95 1947554816
## 4: NA 50 330950 1194353.7 80408.71 1102820
## 5: 1628.953 50 6011 27667.0 14420.65 11415
## 6: 0.000 50 1661 0.0 0.00 0
## 7: 0.000 50 25579 0.0 0.00 0
## 8: 0.000 50 9857 0.0 0.00 0
## maxValue medianValue meanLog10Value sdLog10Value minLog10Value
## 1: 100340008 83521920 7.943731 0.05087158 7.907147
## 2: 478482 462871 5.685184 0.02196743 5.661067
## 3: 2407616256 2123706752 9.333729 0.04620032 9.290596
## 4: 1253602 1226639 6.105635 0.02781781 6.073923
## 5: 38933 32653 4.417139 0.26748493 4.111187
## 6: 0 0 2.618310 0.00000000 2.618310
## 7: 0 0 3.805824 0.00000000 3.805824
## 8: 0 0 3.391685 0.00000000 3.391685
## maxLog10Value medianLog10Value ctrl_meanValue ctrl_sdValue log2FC
## 1: 8.001824 7.922221 8.837948e+07 6.727066e+07 -0.003300611
## 2: 5.704050 5.690437 7.367430e+06 5.671251e+06 -3.931088706
## 3: 9.382482 9.328109 2.722777e+09 1.594957e+09 -0.333612641
## 4: 6.125917 6.117065 1.211139e+06 9.127339e+05 -0.018838030
## 5: 4.606766 4.533464 2.978967e+04 2.402780e+04 -0.101340058
## 6: 2.618310 2.618310 2.870047e+05 2.885599e+05 -9.434964153
## 7: 3.805824 3.805824 2.694659e+06 2.044094e+06 -8.722418853
## 8: 3.391685 3.391685 5.442400e+04 5.577482e+04 -4.528910170
## ctrlMeanTP1To4 ctrlSDTP1To4 log2FC_adj estimate estimate1 estimate2
## 1: 144402854.0 20678641.0 -0.7111049 -0.2433690 7.943731 8.187100
## 2: 10977001.8 552236.8 -4.5045805 -1.3605009 5.685184 7.045685
## 3: 3711423680.0 220317999.1 -0.7798068 -0.2305154 9.333729 9.564244
## 4: 3468554.4 251964.2 -1.4754824 -0.4433025 6.105635 6.548937
## 5: 3704056.4 267599.9 -6.9890744 -2.1379656 4.417139 6.555104
## 6: 1201953.8 352478.9 -11.4996120 -3.3953950 2.618310 6.013705
## 7: 4227198.2 221295.1 -9.3707789 -2.8272269 3.805824 6.633050
## 8: 217209.9 29914.0 -6.4780728 -1.9021723 3.391685 5.293857
## statistic p.value FDRCorrect
## 1: -4.892989 1.008098e-02 0.0780370841
## 2: -74.415340 1.990020e-07 0.0004074567
## 3: -8.394682 9.808226e-03 0.0765041646
## 4: -25.157460 2.292129e-04 0.0080224498
## 5: -13.812490 5.016011e-03 0.0500989411
## 6: -54.607742 3.351762e-04 0.0093499440
## 7: -263.875552 1.436125e-05 0.0024662376
## 8: -74.924454 1.780889e-04 0.0073246647
## Compound_Name
## 1:
## 2:
## 3: Massbank:PB000419 Arginine|2-amino-5-(diaminomethylideneamino)pentanoic acid
## 4: Spectral Match to Pro-Arg from NIST14
## 5: Spectral Match to Val-Arg from NIST14
## 6: Ethylenediaminetetraacetic acid EDTA
## 7: Spectral Match to Arg-Phe from NIST14
## 8: Massbank:RP013102 Riboflavin|7,8-dimethyl-10-[(2S,3S,4R)-2,3,4,5-tetrahydroxypentyl]benzo[g]pteridine-2,4-dione
## CF_kingdom CF_superclass
## 1: Organic compounds Organic acids and derivatives
## 2: no matches no matches
## 3: Organic compounds Organic acids and derivatives
## 4: Organic compounds Organic acids and derivatives
## 5: Organic compounds Organic acids and derivatives
## 6: Organic compounds Organic acids and derivatives
## 7: Organic compounds Organic acids and derivatives
## 8: Organic compounds Organoheterocyclic compounds
## CF_class CF_subclass
## 1: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 2: no matches no matches
## 3: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 4: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 5: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 6: Carboxylic acids and derivatives Tetracarboxylic acids and derivatives
## 7: Carboxylic acids and derivatives Amino acids, peptides, and analogues
## 8: Pteridines and derivatives Alloxazines and isoalloxazines
## CF_Dparent CF_superclass2
## 1: N-acyl-L-alpha-amino acids Organic acids and derivatives
## 2: no matches no matches
## 3: Arginine and derivatives Organic acids and derivatives
## 4: Dipeptides Organic acids and derivatives
## 5: Dipeptides Organic acids and derivatives
## 6: Tetracarboxylic acids and derivatives Organic acids and derivatives
## 7: Dipeptides Organic acids and derivatives
## 8: Flavins Organoheterocyclic compounds
######## Present/absent plot
summary_data[,PresentInCtrls:=ifelse(ctrlMeanTP1To4 > 5e4, 1, 0)]
summary_data[,PresentInCtrls2:=ifelse(ctrlMeanTP1To4 > 3*BK, 1, 0)]
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(PresentInCtrls ==1)]##
## FALSE TRUE
## 427 3668
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(PresentInCtrls2 ==1 & meanValue > 3*BK)]##
## FALSE TRUE
## 728 3361
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(ctrlMeanTP1To4 > 1e5)]##
## FALSE TRUE
## 682 3413
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(ctrlMeanTP1To4 > 1e4)]##
## FALSE TRUE
## 265 3830
summary_data[,Present:=ifelse(meanValue > 3*BK, 1, 0)]
present_in_ctrls_summary_plot <- ggplot(summary_data[Present == 1 & !is.na(FDRCorrect) & Strain == "2243" & AcConc==1],
aes(x = factor(Time), fill = factor(ifelse(PresentInCtrls2==0, "Absent in controls", "Present in controls")))) +
geom_bar(stat = "count", position = "stack") + facet_grid(ifelse(MetName %in% c("unknown", "Unknown"), "Unidentified", "Identified")~ifelse(IonMode == "Neg", "Negative Mode", 'Positive Mode'), scales = "free_y") +
scale_y_continuous(name = "Number of features", expand = c(0,0)) + theme_classic2() + xlab("Time point (hr)") + scale_fill_brewer(palette = "Set1", name = "") +
theme(axis.text.x = element_text(size=8))
ggsave(present_in_ctrls_summary_plot, file = paste0(outdir, "presentAbsentCtrlsPlotUpdated.pdf"), width = 6, height = 3.6)
present_in_ctrls_summary_plot############## Diff abundance plot
summary_data[,DiffAbund:=case_when(
FDRCorrect < 0.1 & log2FC_adj > 0 ~ "Produced",
FDRCorrect < 0.1 & log2FC_adj < 0 ~ "Depleted",
TRUE ~ "None"
)]
diff_abund_summary_plot <- ggplot(summary_data[!is.na(FDRCorrect) & Strain == "2243" & AcConc==1 & DiffAbund != "None"], aes(x = factor(Time), fill = factor(DiffAbund, levels = c("Produced", "Depleted", "None")))) +
geom_bar(stat = "count", position = "stack") + facet_grid(ifelse(MetName %in% c("unknown", "Unknown"), "Unidentified", "Identified")~IonMode, scales = "free_y") +
scale_y_continuous(name = "Number of features", expand = c(0,0)) + theme_classic2() + xlab("Time point (hr)") + scale_fill_brewer(palette = "Set1", name = "") +
theme(axis.text.x = element_text(size=8)) + scale_x_discrete(limits = summary_data[,factor(sort(unique(Time)))])
ggsave(diff_abund_summary_plot, file = paste0(outdir, "DiffAbundCtrlsPlotUpdated.pdf"), width = 5.5, height = 3.6)
diff_abund_summary_plotdiff_abund_mets <- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & !is.na(p.value) & abs(log2FC_adj) > 0.75 & FDRCorrect < 0.1, IDUniq]
############################ Heatmap
summary_data[AcConc==1 & Strain == "2243" ,logZscore:=scale(meanLog10Value), by = IDUniq]
mean_data_wide <- dcast(summary_data[AcConc==1 & Strain == "2243" & IDUniq %in% diff_abund_mets], IDUniq+MetName+MSI~Time, value.var = "logZscore")
row_labs <- mean_data_wide[,ifelse(MetName %in% c("Unknown", "unknown"), "", paste0(MetName, " (", MSI, ")"))]
pdf(paste0(outdir, "heatmap_timeCourse_v2.pdf"), width = 5, height = 8)
Heatmap(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F]), cluster_columns = F,
row_labels = row_labs, row_names_gp = gpar(fontsize= 8), col = c(brewer.pal(3, "Set1")[2], "white", brewer.pal(3, "Set1")[1]))
dev.off()## png
## 2
Heatmap(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F]), cluster_columns = F,
row_labels = row_labs, row_names_gp = gpar(fontsize= 8), col = c(brewer.pal(3, "Set1")[2], "white", brewer.pal(3, "Set1")[1]))diff_abund_id_mets<- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & abs(log2FC_adj) > 1 & FDRCorrect < 0.1 & !grepl("known", MetName), IDUniq]Figure S1 various media analyses
####### Figure S1
named_mets <- data.table(MetName = summary_data[!grepl("known", MetName),sort(unique(MetName))])
named_mets[c(29, 31, 41, 48, 57, 63, 64, 68, 94, 100, 101, 103, 104, 105, 107, 109,
115, 118, 125, 126, 127, 131), MediaComponent:=1]
named_mets[c(29, 31, 41, 48, 57, 63, 64, 68, 94, 100, 101, 103, 104, 105, 107, 109,
115, 118, 125, 126, 127, 131), Category:=c("Amino acids", "Vitamins and Cofactors", "Other", rep("Amino acids", 5),
"Vitamins and Cofactors", "Amino acids", "Other", "Amino acids",
rep("Vitamins and Cofactors",3), "Amino acids", "Vitamins and Cofactors",
rep("Amino acids", 3), "Other", "Amino acids")]
summary_data <- merge(summary_data, named_mets, by = "MetName", all.x = T)
mean_data_wide2 <- dcast(summary_data[AcConc==1 & Strain == "2243"], IDUniq+MetName~Time, value.var = "meanLog10Value")
row_labs <- mean_data_wide2[,ifelse(MetName %in% c("Unknown", "unknown"), "", MetName)]
media_component_plot <- summary_data[MediaComponent==1 & AcConc == 1 & Strain=="2243"] %>%
complete(MetName, IonMode, Time) %>% ggplot(aes(y = factor(MetName, levels = rev(sort(named_mets[,MetName]))), x = factor(Time), fill = meanLog10Value)) + facet_grid(.~IonMode, scales = "free") +
geom_tile() + scale_fill_gradient(low = "white", high = brewer.pal(8, "Purples")[8], na.value = "gray80") +
xlab("Time (hr)") + ylab("")
ggsave(media_component_plot, file = paste0(outdir, "media_components_metabolomics.pdf"), width=7, height = 5.5)
media_component_plot####### compare with Sonnenburg and Bisanz datasets
## Quick look at sonnenburg data
invitro_metadata <- fread("../sonnenburg_in_vitro_metadata.txt")
samples_interest <- invitro_metadata[grepl("Eggerthella", taxonomy)]
invitro_data <- fread("../sonnenburg_in_vitro_data.txt", fill = T)
invitro_sub <- invitro_data[V1 %in% samples_interest[,V1]]
invitro_sub <- melt(invitro_sub, id.var = "V1", variable.name = "compound")
invitro_sub <- invitro_sub[!is.na(value)]
invitro_summary <- invitro_sub[,list(median(value), mean(value), sd(value)), by=compound]
setnames(invitro_summary, c("V1", "V2", "V3"), c("medianLog2FC", "mean_log2FC", "sd_log2FC"))
invitro_summary[,Prod:=ifelse(medianLog2FC > 0.5, 1, 0)]
invitro_summary[,Deplete:=ifelse(medianLog2FC < -0.5, 1, 0)]
invitro_summary[,Outcome:=case_when(
Prod==1 ~ "Produced",
Deplete==1 ~ "Depleted",
TRUE ~"Unchanged"
)]
summary_data_sub <- summary_data[Strain == "2243" & AcConc==1]
summary_data_sub[,Prod:=ifelse(FDRCorrect < 0.1 & log2FC_adj > 0.5, 1, 0)]
summary_data_sub[,Deplete:=ifelse(FDRCorrect < 0.1 & log2FC_adj < -0.5, 1, 0)]
summary_data_sub[,Outcome:=case_when(
Prod==1 ~ "Produced",
Deplete==1 ~ "Depleted",
TRUE ~"Unchanged"
)]
prev_data <- fread("../../ElentaStrainMetabolomics/DiffAbundAllSummary_filtered_finalNorm.tsv")
prev_data[,Prod2:=ifelse(T_FDR_pvalue < 0.1 & log2FC > 0.5, 1, 0)]
prev_data[,Deplete2:=ifelse(T_FDR_pvalue < 0.1 & log2FC < -0.5, 1, 0)]
prev_data[,Outcome:=case_when(
Prod2==1 ~ "Produced",
Deplete2==1 ~ "Depleted",
TRUE ~"Unchanged"
)]
prev_data_sub <- prev_data[SampleID == "Eggerthella_lenta_UCSF2243"]
prev_data_sub[,MetName:=DB_Compound_Name1]
prev_data_sub[is.na(MetName), MetName:="Unknown"]
invitro_summary[,MetName:=compound]
counts_all <- rbind(
data.table(summary_data_sub[TimePoint == 6,list(IDUniq, MetName, Outcome)], Dataset = "EDM1"),
data.table(prev_data_sub[,list(FeatureID, MetName, Outcome)], Dataset = "Bisanz et al\n(ISP-2+Arg)"),
data.table(invitro_summary[,list(compound, Outcome)], Dataset = "Han et al\n(Mega)"), fill = T
)
counts_all[,Dataset:=factor(Dataset, levels = c(
"Bisanz et al\n(ISP-2+Arg)",
"Han et al\n(Mega)",
"EDM1"
))]
# blue = HSL 7, 2, 255
# red = rgb 255, 4, 1
count1 <- ggplot(counts_all[Outcome != "Unchanged"], aes(x = Dataset, fill = Outcome)) + geom_bar(position = "dodge") + xlab("In vitro metabolomics dataset") +
ylab("Count") + scale_fill_manual(values = brewer.pal(3, "Set1")[c(2,1)]) + theme(axis.text = element_text(color = "black", size = 8)) +
scale_y_continuous(expand = c(0,0))
count2 <- ggplot(counts_all[!grepl("known", MetName) & Outcome != "Unchanged"], aes(x = Dataset, fill = Outcome)) +
geom_bar(position = position_dodge(preserve = "single")) + xlab("In vitro metabolomics dataset")+ ylab("Number of identified metabolite features") +
scale_fill_manual(values = brewer.pal(3, "Set1")[c(2,1)], name = "")+ theme(axis.text = element_text(color = "black", size = 8)) + scale_y_continuous(expand = c(0,0))
compare_plot <- count1+guides(fill = "none") + count2
ggsave(compare_plot, file = paste0(outdir, "compare_studies_plot.pdf"), width = 6, height = 3)
compare_plot############## Trajectories Figure S1E
hclust_results <- hclust(dist(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F])))
clusts <- cutree(hclust_results, h = 1.6)
clust_table <- data.table(IDUniq = mean_data_wide[,IDUniq], Clust = clusts)
plot_dat <- summary_data[AcConc==1 & Strain == "2243" & IDUniq %in% diff_abund_mets]
plot_dat <- merge(plot_dat, clust_table, by = "IDUniq", all.x = T)
plot_dat[,Clust2:=factor(paste0("Cluster ", Clust), levels = c(paste0("Cluster ", 1:20)))]
gnps_color_scale <- c(brewer.pal(12, "Set3")[c(1:2,4:6)], "gray40")
names(gnps_color_scale) <- c(summary_data[CF_superclass2 != "no matches", sort(unique(CF_superclass2))], "no matches")
good_clusts <- plot_dat[,list(length(unique(IDUniq)), length(unique(MetName[!grepl("known", MetName)]))), by = Clust2][V1 > 4 & V2 > 0, Clust2]
trajectory_plot <- ggplot(plot_dat[CF_superclass2 == "no matches" & Clust2 %in% good_clusts],
aes(x = Time, y = logZscore, group = IDUniq, color = CF_superclass2)) +
geom_line(alpha = 0.5) + geom_line(data = plot_dat[CF_superclass2 != "no matches"& Clust2 %in% good_clusts]) +
facet_grid(.~factor(Clust2, levels = good_clusts)) + scale_color_manual(values= gnps_color_scale, name = "Chemical superclass") +
xlab("Time (hr)") + ylab("Scaled log peak height") #+
clust_tab2 <- plot_dat[MSI != "",list(IDUniq, MetName, MSI, Clust)] %>% distinct()
clust_tab2[,DisplayName:=paste0(MetName, " (", MSI, ")")]
clust_tab2[,CompRank:=rank(MSI, MetName, ties.method = "first"), by = Clust]
clust_tab2[,CompRank:=max(CompRank)-CompRank]
clust_tab2[,Clust2:=factor(paste0("Cluster ", Clust), levels = good_clusts)]
clust_labels <- ggplot(clust_tab2[Clust2 %in% good_clusts], aes(x = 1, y = CompRank, label = DisplayName)) + geom_text(size = 3) + facet_grid(~Clust2, drop = F) + theme_nothing()
trajectory_plot_all <- trajectory_plot + clust_labels + plot_layout(nrow = 2, heights = c(1, 1.3))
ggsave(trajectory_plot_all, file = paste0(outdir, "hclust_trajectories.pdf"), width = 11, height = 3.5)
trajectory_plot_allrm(list = ls())
### Reanalyze all growth experiments
library(data.table)
library(tidyverse)
library(growthcurver)
library(cowplot)
library(patchwork)
library(readxl)
library(RColorBrewer)
library(ggpubr)
library(ggbeeswarm)
source("scripts/growth_curve_functions.R")##
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
##
## stamp
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.8.0 ggbeeswarm_0.6.0 growthcurver_0.3.1
## [4] KEGGREST_1.32.0 patchwork_1.1.1 ggrepel_0.9.1
## [7] ComplexHeatmap_2.13.1 ggpubr_0.4.0 RColorBrewer_1.1-2
## [10] fastcluster_1.2.3 vegan_2.6-2 lattice_0.20-45
## [13] permute_0.9-5 webchem_1.1.2 forcats_0.5.1
## [16] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
## [19] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [22] tidyverse_1.3.1 readxl_1.3.1 cowplot_1.1.1
## [25] ggplot2_3.3.6 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.34.0
## [7] GlobalOptions_0.1.2 fs_1.5.0 clue_0.3-61
## [10] rstudioapi_0.13 farver_2.1.0 fansi_1.0.3
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.1.1
## [16] doParallel_1.0.17 knitr_1.39 jsonlite_1.8.0
## [19] Cairo_1.6-0 broom_0.8.0 cluster_2.1.3
## [22] dbplyr_2.1.1 png_0.1-7 data.tree_1.0.0
## [25] compiler_4.1.1 httr_1.4.2 backports_1.2.1
## [28] assertthat_0.2.1 Matrix_1.4-1 fastmap_1.1.0
## [31] cli_3.3.0 htmltools_0.5.2 tools_4.1.1
## [34] gtable_0.3.0 glue_1.6.2 GenomeInfoDbData_1.2.7
## [37] Rcpp_1.0.8.3 carData_3.0-5 cellranger_1.1.0
## [40] jquerylib_0.1.4 vctrs_0.4.1 Biostrings_2.62.0
## [43] nlme_3.1-159 iterators_1.0.13 xfun_0.31
## [46] rvest_1.0.2 lifecycle_1.0.0 rstatix_0.7.0
## [49] zlibbioc_1.40.0 MASS_7.3-57 scales_1.1.1
## [52] hms_1.1.1 parallel_4.1.1 yaml_2.3.5
## [55] sass_0.4.1 stringi_1.7.6 highr_0.9
## [58] S4Vectors_0.32.4 foreach_1.5.2 BiocGenerics_0.40.0
## [61] shape_1.4.6 GenomeInfoDb_1.30.1 rlang_1.0.2
## [64] pkgconfig_2.0.3 matrixStats_0.62.0 bitops_1.0-7
## [67] evaluate_0.15 labeling_0.4.2 tidyselect_1.1.2
## [70] magrittr_2.0.3 R6_2.5.1 magick_2.7.3
## [73] IRanges_2.28.0 generics_0.1.0 DBI_1.1.1
## [76] pillar_1.7.0 haven_2.5.0 withr_2.5.0
## [79] mgcv_1.8-40 abind_1.4-5 RCurl_1.98-1.7
## [82] modelr_0.1.8 crayon_1.4.1 car_3.0-13
## [85] utf8_1.2.2 tzdb_0.3.0 rmarkdown_2.14
## [88] GetoptLong_1.0.5 reprex_2.0.1 digest_0.6.29
## [91] stats4_4.1.1 munsell_0.5.0 beeswarm_0.4.0
## [94] vipor_0.4.5 bslib_0.3.1
main_dir <- "../GrowthExperiments/"
results_tables <- data.table(read_xlsx(paste0(main_dir, "allGrowthCurveData2021-06-08.xlsx")))## New names:
## • `` -> `...8`
results_tables <- results_tables[!is.na(ResultsFile)]
outdir <- "fig_s3/"
dir.create(outdir)
all_data <- rbindlist(lapply(1:nrow(results_tables), function(x){
data <- read_growthcurve_file(paste0(results_tables[x, Directory], "/", results_tables[x, ResultsFile]))
data[,Experiment:=results_tables[x, Experiment]]
return(data)
}), fill = T)
all_layouts <- rbindlist(lapply(1:nrow(results_tables), function(x){
if(grepl("xlsx", results_tables[x, LayoutFile])){
if(!is.na(results_tables[x, as.numeric(LayoutSheet)])){
layout <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]),
sheet = results_tables[x, as.numeric(LayoutSheet)]))
} else {
layout1 <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]),
sheet = 1))
layout2 <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]),
sheet = 2))
layout1[,Type:="Media"]
layout2[,Type:="Strain"]
layout <- rbind(layout1[!is.na(Row)], layout2[!is.na(Row)], fill = T)
}
} else {
layout <- fread(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]), header = T)
}
if(names(layout)[1] %in% c("...1", "V1")){ setnames(layout, names(layout)[1], "Row")}
if(!"Type" %in% names(layout)){
if(ncol(layout) > 13) layout <- layout[,1:13, with=F]
layout[,Type:=ifelse(is.na(Row)|Row == "", "Strain", "Media")]
layout[,Row:=sapply(1:nrow(layout), function(x){
if(!is.na(layout[x, Row]) & layout[x,Row] != ""){
return(layout[x,Row])
} else {
return(layout[x-1, Row])
}
})]
}
layout <- melt(layout, id.var = c("Row", "Type"), variable.name = "Column")
layout <- dcast(layout[!is.na(Row)], Row+Column~Type, value.var = "value")
layout[,Experiment:=results_tables[x, Experiment]]
return(layout)
}), fill = T)## New names:
## • `` -> `...14`
## • `` -> `...15`
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...14`
## • `` -> `...15`
## AcArgUracilRflv and AcMinRflvFollowUps have errors in the strain id here
all_layouts[Experiment=="AddCarboxylicAcids" & Column %in% c(2,6,10), Strain:="c"]
## Fix inconsistent layout issues
all_layouts[grepl("_c$", Media), Strain:="c"]
all_layouts[grepl("_c$", Media), Media:=gsub("_c$", "", Media)]
all_layouts[Experiment %in% c("AminoAcidLOOs", "GMM_LAB_Components2") & grepl("c$", Media), Strain:="c"]
all_layouts[Experiment %in% c("AminoAcidLOOs", "GMM_LAB_Components2") & grepl("c$", Media), Media:=gsub("c$", "", Media)]
all_layouts[!is.na(Media) & Media != "" & is.na(Strain), Strain:="2243"]
all_layouts[Strain == "AB8", Strain:="AB8n2"]
all_layouts[Strain == "Val", Strain:="Valencia"]
all_layouts <- all_layouts[!is.na(Media) & !is.na(Strain)]
all_layouts <- all_layouts[Media != "" & Strain != ""]
aa_names <- data.table(read_xlsx(paste0(results_tables[Experiment == "AminoAcidLOOs", Directory], "/",
results_tables[Experiment == "AminoAcidLOOs", LayoutFile]),
sheet = results_tables[Experiment == "AminoAcidLOOs", OtherInfoSheet]))
aa_names[,ConditionID:=as.character(ConditionID)]
all_layouts <- merge(all_layouts, aa_names, by.x = "Media", by.y = "ConditionID", all.x = T)
all_layouts[!is.na(Condition), Media:=Condition]
all_layouts[,Condition:=NULL]
all_layouts[,well:=paste0(Row, Column)]
all_layouts <- all_layouts[!(Experiment == "MineralLOOs" & well == "B8")]
all_layouts[Experiment == "AddCarboxylicAcids" & Column %in% c(2, 6, 10), Strain:="c"]
all_layouts[Experiment == "AcArgUracilRflv" & Media == "DM1_50Arg_10Ac", Media:="DM1_50Arg_0Ac"]
all_layouts[Experiment == "AcArgUracilRflv" & Media == "DM1_50Arg_1Ac", Media:="DM1_50Arg_10Ac"]
all_layouts[Experiment == "AcArgUracilRflv", ArgConc:=as.numeric(ifelse(grepl("Arg", Media), gsub("DM1_", "", gsub("Arg.*$", "", Media)), 100))]
all_layouts[Experiment == "AcArgUracilRflv", AcConc:=as.numeric(ifelse(grepl("Ac", Media), gsub("DM1_", "", gsub("[0-9]+Arg_", "", gsub("Ac.*$", "", Media))), 100))]
all_layouts[,BaseDM1Condition:=ifelse(Media %in% c("DM1", "DM1b", "DM1Ac10", "DM1_Old", "None-base", "GL", "GLHmOld", "DM1_v2", "DM1_VitMix", "Ac1"), "DM1", "Other")]
all_data[Experiment == "AcArgUracilRflv", time:=time+25.5]
growth_data_long <- melt(all_data, id.var = c("time", "temp", "Experiment"), variable.name = "well")
growth_data_long <- merge(growth_data_long, all_layouts, by = c("well", "Experiment"), all.x = T)
growth_data_long <- growth_data_long[!is.na(Media) & !is.na(value)] #wells without metadata were empty
setnames(all_layouts, "Media", "Condition")
setnames(growth_data_long, "Media", "Condition")
growth_data_long[Strain == 'c' & time < 28 & Experiment != "GLUACvariants" & value > 0.17, value:=NA] #remove weird early readings
bad_blank_wells <- growth_data_long[Strain == "c" & (time > 28 & Experiment != "GLUACvariants"), length(value[value > 0.25]), by = list(well, Experiment)][V1 > 10]
bad_blank_wells2 <- growth_data_long[,length(value[time < 4 & value > 0.3& Experiment != "GLUACvariants"]), by=list(well, Experiment)][V1 > 1]
bad_blank_wells3 <- growth_data_long[Strain == "c", (median(value[time > (max(time)-10)], na.rm = T)-median(value[time < 10])), by = list(well, Experiment)][V1 > 0.06]
bad_blank_wells <- unique(rbind(bad_blank_wells[,list(well, Experiment)], bad_blank_wells2[,list(well, Experiment)], bad_blank_wells3[,list(well, Experiment)]))
bad_blank_wells[,BadBlank:=1]
growth_data_long <- merge(growth_data_long, bad_blank_wells[,list(Experiment, well, BadBlank)], by = c("Experiment", "well"), all.x = T)
all_layouts <- merge(all_layouts, bad_blank_wells[,list(Experiment, well, BadBlank)], by = c("Experiment", "well"), all.x = T)
blank_wells1 <- all_layouts[Strain=="c" & is.na(BadBlank), list(Experiment, well, Condition)]
### Normalize all data
corrected_data_all <- rbindlist(lapply(1:nrow(results_tables), function(j){
exp <- results_tables[j, Experiment]
blank_wells <- blank_wells1[Experiment == exp, well]
names(blank_wells) <- blank_wells1[Experiment == exp, Condition]
corrected_data <- correct_growth_data_custom(growth_data_long[Experiment == exp], blank_wells = blank_wells,
method = "blankTimeMatch", wide_form = F)
corrected_data[,Experiment:=exp]
}), fill = T)## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
problem_wells <- corrected_data_all[,sum(NewValue < 0), by=list(Experiment, well)][V1 > 0]
corrected_data_all[Experiment == "GLUACvariants",NewValue:=NewValue - min(NewValue, na.rm = T), by = list(Experiment, well)]
corrected_data_all[,NewValue:=NewValue - min(NewValue, na.rm = T), by = list(Experiment, well)]
### Reassign one sub-experiment
all_layouts[Experiment == "AgmatineCombinations" & grepl("old", Condition, ignore.case = T), Experiment:="DM1OldB12Old"]
corrected_data_all[Experiment == "AgmatineCombinations" & well %in% all_layouts[Experiment == "DM1OldB12Old", well], Experiment:="DM1OldB12Old"]
growth_data_norm <- dcast(corrected_data_all, Experiment+time~well, value.var = "NewValue", fill = NA)
experiment_list <- corrected_data_all[,sort(unique(Experiment))]
trim_after_times <- rep(52, length(experiment_list))
trim_after_times[which(experiment_list == "AcArgUracilRflv")] <- 64
trim_after_times[which(experiment_list == "AcetateDosesStrains2")] <- 44
trim_after_times[which(experiment_list == "AcetateDosesStrains")] <- 40
## Fit logistic models
model_fits <- rbindlist(lapply(1:length(experiment_list), function(x) {
exp <- experiment_list[x]
foo <- fit_growthcurver_models(growth_data_norm[Experiment == exp, 2:ncol(growth_data_norm), with=F],
indiv_fits = T, trim_after = trim_after_times[x]) #, nrow(growth_data_norm[Experiment == exp & time < 0.5]
foo[,Experiment:=exp]
return(foo)
}), fill = T)
model_fits <- merge(model_fits, all_layouts[,list(Experiment, well, Strain, Condition, BaseDM1Condition)], by.x = c("Experiment", "Sample"), by.y = c("Experiment", "well"), all.x = F)
model_fits[,Category:=gsub("[0-9]$", "", Experiment)]
## Calculate Percent of base version
base_models <- model_fits[BaseDM1Condition == "DM1"]
base_models[,BadFit:=ifelse(k > 1 | sigma > 0.1, 1, 0)]
base_model_summary <- base_models[BadFit==0 & Strain == "2243",list(1/mean(sapply(r[!is.na(r)], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0), mean(t_mid), sd(t_mid), mean(t_gen), sd(t_gen), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e)), by=list(Strain, Experiment)]
## Split "old' comparison into its own experiment
bad_base <- c("AminoAcidLOOs", "GLUACvariants", "GMM_LAB_Components2")
mean_base_good <- base_model_summary[!Experiment %in% bad_base,
lapply(.SD, mean), .SDcols=paste0("V", seq(1,13,by=2))]
model_fits_compare <- merge(model_fits[Strain != "c"], base_model_summary[!Experiment %in% bad_base], by = "Experiment", all.x = T)
model_fits_compare[is.na(V1), V1:=mean_base_good[,V1]]
model_fits_compare[is.na(V3), V3:=mean_base_good[,V3]]
model_fits_compare[is.na(V5), V5:=mean_base_good[,V5]]
model_fits_compare[is.na(V7), V7:=mean_base_good[,V7]]
model_fits_compare[is.na(V9), V9:=mean_base_good[,V9]]
model_fits_compare[is.na(V11), V11:=mean_base_good[,V11]]
model_fits_compare[is.na(V13), V13:=mean_base_good[,V13]]
model_fits_compare[,FractionAUC_e:=auc_e/V13]
model_fits_compare[,FractionAUC_l:=auc_l/V11]
loo_dat <- model_fits[Experiment %in% c("AminoAcidLOOs", "MineralLOOs", "VitaminLOOs", "VitaminLOOs2", "VitaminLOOs3", "DM1OldB12Old", "AminoAcidLOORedo")| Experiment %in%
c("AgmatineCombinations", "DM1OldB12Old")|(Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil")|(Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0")))]
condition_info <- unique(loo_dat[,list(Experiment, BaseDM1Condition, Condition)])
condition_info[BaseDM1Condition == "DM1", LOO:="DM1"]
condition_info[is.na(LOO),LOO:=c("Uracil", "Acetate", rep(NA, 12), "B12", "Arg", "Leu", "Lys", "Cys","His", "Ile", "Met","Trp", "Tyr",
"Phe","Ser", "Thr", "Val", "Pro",NA,
"Acetate",NA, NA, NA,
"Arg", rep(NA, 4), "His", "Cys", "Ile", "Lys", "Leu", "Met", "Ser", "Phe", "Thr", "Tyr","Trp", "Val", "Pro", "B12",
"Ni", "Zn", "H3BO3", "WO4", "Mg", "Mn", "Mo", "Fe", "Ca", "HCO3", "NO3", "Co", "Cu", NA, NA,NA, "Ascorbic acid", "B12", NA,
"NAD+", "Pyridoxamine", "Pantothenate", "Folate", "Menadione", "Thiamine","Biotin", "Riboflavin",
"Hemin", "p-aminobenzoate", NA, NA, "Ascorbic acid", "B12", NA, "NAD+", "Pyridoxamine", "Pantothenate", "Folate", "Menadione",
"Thiamine", "Biotin", "Riboflavin", "Hemin", "p-aminobenzoate", NA, NA, NA, "p-aminobenzoate", "Pantothenate", NA, "NAD+", "B12",
"Thiamine", NA, "Biotin", "Riboflavin", "Hemin", NA, NA, NA)]
condition_info <- condition_info[!(Experiment == "AminoAcidLOOs" & LOO == "Acetate")]
condition_info[!is.na(LOO),LOOCategory:=c("Other", "Other", "Other", "Other","Vitamins", "Vitamins", rep("Amino acids", 15),
rep("Amino acids", 15), "Vitamins", "Vitamins",
rep("Minerals", 14), rep("Vitamins", 35))]
condition_info[LOO=="DM1", LOOCategory:="DM1"]
condition_info[,LOOCategory:=factor(LOOCategory, levels = c("DM1", "Amino acids", "Vitamins", "Minerals", "Other"))]
levels(condition_info$LOOCategory) <- c("Full DM1", "Amino acid dropouts", "Vitamin dropouts", "Mineral dropouts", "Other dropouts")
corrected_data_all[,Time2:=round(time, digits = 1)]
corrected_data_all[,Condition2:=ifelse(Condition == "DM1", paste0(Condition, "_", Experiment), Condition)]
corrected_data_all[,Condition3:=paste0(Condition, "_", Experiment)]
average_dat <- corrected_data_all[Strain == "2243" & Experiment != "VitaminLOOs2" & (grepl("LOO", Experiment)|
Experiment %in% c("AgmatineCombinations", "DM1OldB12Old")|
(Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil"))|
(Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0"))),
list(mean(NewValue), sd(NewValue)/sqrt(length(NewValue))),
by = list(Experiment,Condition2, Condition, Strain, Time2)]
average_dat_sub <- merge(average_dat, unique(condition_info[!is.na(LOO)]), by =c("Condition", "Experiment"), all.x = F)
setnames(average_dat_sub, "Time2", "time")
model_fits_compare <- merge(model_fits_compare, condition_info, by = c("Experiment", "Condition"), all.x = T)
loo_order2 <- model_fits_compare[!is.na(LOO),median(FractionAUC_l), by=LOO][order(V1), LOO]
model_fits_compare[,LOO:=factor(LOO, levels = loo_order2)] #rev(loo_colors)
model_fits_compare <- model_fits_compare[Strain.x == "2243"]
model_fits_compare[,Category2:=factor(gsub(" dropouts", "s", as.character(LOOCategory)), levels = c("Full DM1", "Amino acids", "Vitamins", "Minerals", "Others"))]
model_fits_compare[,FracTMid:=t_mid/V7]
model_fits_compare[,FracK:=k/V3]
model_fits_compare[,FracR:=r/V1]
setnames(model_fits_compare, c(paste0("V", 1:14)), paste0("base_", c("r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd",
"tmid_mean", "tmid_sd", "tgen_mean", "tgen_sd", "aucL_mean", "aucL_sd", "aucE_mean", "aucE_sd")))
corrected_data_sub <- merge(corrected_data_all[Strain == "2243" & (grepl("LOO", Experiment)|
Experiment %in% c("AgmatineCombinations", "DM1OldB12Old")|
(Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil"))|
(Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0")))], unique(condition_info[!is.na(LOO)]), by = c("Experiment", "Condition"), all.x = F)
loo_colors <- condition_info[!is.na(LOO)][order(LOOCategory, LOO)][,unique(LOO)]
average_dat_sub[,LOO:=factor(LOO, levels = loo_colors)]
average_dat_sub[,Group2:=ifelse(LOO != "DM1", as.character(LOOCategory), Experiment)]
average_dat_sub[LOO == "DM1" & (grepl("Vitamin", Experiment)|grepl("B12", Experiment)|grepl("Agma", Experiment)), Group2:="Vitamin dropouts"]
average_dat_sub[LOO == "DM1" & (grepl("Mineral", Experiment)) , Group2:="Mineral dropouts"]
average_dat_sub[LOO == "DM1" & (grepl("Amino", Experiment)) , Group2:="Amino acid dropouts"]
average_dat_sub[LOO == "DM1" & (grepl("Ac", Experiment)|grepl("Urac", Experiment)) , Group2:="Other dropouts"]
average_dat_sub <- average_dat_sub[!(Experiment == "AminoAcidLOOs" & LOO=="DM1" & Group2=="Other dropouts")]
average_dat_sub[,Group2:=factor(Group2, levels = c("Amino acid dropouts", "Vitamin dropouts", "Mineral dropouts", "Other dropouts"))]
### Plots
## Get summary parameters
category_colors <- c(brewer.pal(8, "Dark2")[c(1,4,5, 3)])
category_colors <- c(brewer.pal(8, "Set3")[c(4,5,6,3)])
names(category_colors) <- c("Amino acids", "Vitamins", "Minerals", "Others")
category_colors2 <- category_colors
names(category_colors2)[1] <- "Amino acids &\nother carbon sources"
model_fits_compare_good2 <- model_fits_compare[!is.na(LOO) & LOOCategory != "Full DM1" &
Experiment != "VitaminLOOs2" & Strain.x=="2243"]
model_fits_compare_good2 <- model_fits_compare_good2[(sigma < 0.15|r_se > 10) & base_k_mean < 1] ## Bad fits, don't lose any LOO data
fwrite(model_fits_compare_good2, file = paste0(outdir, "growth_curve_resultsAllSummaryParams_LOO.txt"), sep = "\t")
model_fits_compare_good2b <- melt(model_fits_compare_good2, id.var = c("LOOCategory", "LOO", "Sample", "Condition", "Experiment"),
measure.vars = c("auc_e", "t_mid", "k", "r"))
model_fits_compare_good2b[,LOOCategory2:=gsub(" dropouts", "s", as.character(LOOCategory))]
median_fracs2b <- model_fits_compare_good2b[Experiment !="VitaminLOOs2" & value < 100,median(value),
by = list(LOOCategory, LOOCategory2, LOO, variable)]
median_fracs2b[, LOOCategory3:=ifelse(LOOCategory2 %in% c("Others", "Amino acids"), "Amino acids &\nother carbon sources", LOOCategory2)]
median_fracs2b[,LOOCategory3:=factor(LOOCategory3, levels = names(category_colors2[1:3]))]
base_means <- unique(model_fits_compare_good2[,list(Experiment, base_aucE_mean, base_tmid_mean, base_k_mean, base_r_mean)])
base_means <- data.table(variable = c("auc_e", "t_mid", "k", "r"), baseVal = c(median(base_means$base_aucE_mean), median(base_means$base_tmid_mean), median(base_means$base_k_mean), median(base_means$base_r_mean)))
parameter_hists2 <- ggplot(median_fracs2b, aes(x = V1, fill = LOOCategory3))+
theme(strip.background = element_blank(), panel.border = element_rect(color = "black", fill = NA)) +
geom_vline(data = base_means, aes(xintercept = baseVal), linetype = 2) + geom_histogram(bins = 15) +
facet_grid(LOOCategory3 ~ variable, scales = "free") +
scale_fill_manual(values = category_colors2[1:3], name = "", guide = "none") + xlab("") + ylab("Number of compounds")
ggsave(parameter_hists2, file = paste0(outdir, "growth_parameter_histograms_clas_absVal.pdf"), width = 6.3, height = 4.5)
parameter_hists2### Diff abund tests
base_models_sub <- base_models[BadFit==0 & Strain == "2243" & Experiment %in% model_fits_compare_good2b[,Experiment]]
unique_loos <- model_fits_compare_good2[Experiment != "VitaminLOOs2",unique(LOO)]
unique_loo_r_pvals <- sapply(unique_loos, function(x){
exp1 <- model_fits_compare_good2[LOO==x]
base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
foo <- wilcox.test(exp1$r, base_exp$r)$p.value
return(foo)
})
unique_loo_k_pvals <- sapply(unique_loos, function(x){
exp1 <- model_fits_compare_good2[LOO==x]
base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
foo <- wilcox.test(exp1$k, base_exp$k)$p.value
return(foo)
})
unique_loo_tmid_pvals <- sapply(unique_loos, function(x){
exp1 <- model_fits_compare_good2[LOO==x]
base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
foo <- wilcox.test(exp1$t_mid, base_exp$t_mid)$p.value
return(foo)
})
unique_loo_auc_pvals <- sapply(unique_loos, function(x){
exp1 <- model_fits_compare_good2[LOO==x]
base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
foo <- wilcox.test(exp1$auc_e, base_exp$auc_e)$p.value
return(foo)
})
diff_results <- data.table(LOO = unique_loos, r = unique_loo_r_pvals, k = unique_loo_k_pvals, tmid = unique_loo_tmid_pvals, auc = unique_loo_auc_pvals)
diff_results[,r_padj:=p.adjust(r, method = "BH")]
diff_results[,k_padj:=p.adjust(k, method = "BH")]
diff_results[,tmid_padj:=p.adjust(tmid, method = "BH")]
diff_results[,auc_padj:=p.adjust(auc, method = "BH")]
diff_results[,table(r_padj < 0.1|k_padj < 0.1|tmid_padj < 0.1|auc_padj < 0.1)]##
## FALSE TRUE
## 24 17
fwrite(diff_results, file = paste0(outdir, "loo_diffAbundWilcoxonGrowthParamsVsBase.csv"))
#Summary by compound
median_fracs <- model_fits_compare_good2b[Experiment !="VitaminLOOs2",median(value), by = list(LOOCategory, LOOCategory2, LOO, variable)]
median_fracs[, LOOCategory3:=ifelse(LOOCategory2 %in% c("Others", "Amino acids"), "Amino acids &\nother carbon sources", LOOCategory2)]
median_fracs[,LOOCategory3:=factor(LOOCategory3, levels = names(category_colors2[1:3]))]
median_fracs[,log2Effect:=log2(V1)]
median_fracs[log2Effect > 0.5 | log2Effect < -1, unique(LOO)]## [1] Uracil Acetate B12 Arg
## [5] His Ile Leu Lys
## [9] Met Phe Pro Ser
## [13] Thr Trp Tyr Val
## [17] Ca Co Cu Fe
## [21] H3BO3 HCO3 Mg Mn
## [25] Mo NO3 Ni WO4
## [29] Zn Ascorbic acid Folate Hemin
## [33] Menadione NAD+ p-aminobenzoate Pantothenate
## [37] Pyridoxamine Riboflavin Thiamine Cys
## [41] Biotin
## 42 Levels: Mg Trp Arg Cys Riboflavin Biotin Met Acetate NAD+ Menadione ... WO4
median_fracs_wide <- dcast(median_fracs, LOOCategory+LOOCategory2+LOO~variable, value.var = "V1")
comp_subset <- unique(model_fits_compare_good2b[, list(LOO, Experiment)])
comp_subset <- comp_subset[!Experiment %in% c("AminoAcidLOOs", "VitaminLOOs2")]
comp_subset <- comp_subset[!(Experiment == "VitaminLOOs" & LOO %in% comp_subset[Experiment != "VitaminLOOs", unique(LOO)])]
model_summary2 <- model_fits_compare_good2[!is.na(LOO),list(length(unique(Experiment)), length(Sample), 1/mean(sapply(r[!is.na(r) & r != 0], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0),
mean(sigma), sd(sigma), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e)), by = LOO]
setnames(model_summary2, c("LOO", "Number of Experiments", "Total number of replicates", "r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd",
"sigma_mean", "sigma_sd", "auc_l_mean", "auc_l_sd", "auc_e_mean", "auc_e_sd"))
base_summary <- data.table(LOO = "EDM1", base_models_sub[,list(length(unique(Experiment)), length(Sample), 1/mean(sapply(r[!is.na(r) & r != 0], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0),
mean(sigma), sd(sigma), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e))])
setnames(base_summary, c("LOO", "Number of Experiments", "Total number of replicates", "r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd",
"sigma_mean", "sigma_sd", "auc_l_mean", "auc_l_sd", "auc_e_mean", "auc_e_sd"))
model_summary2 <- rbind(base_summary, model_summary2, fill = T)
fwrite(model_summary2, file = paste0(outdir, "suppTable_loo_growth_summary.tsv"), quote=F, sep= "\t")
comp_subset2 <- sort(unique(comp_subset[,LOO]))
average_dat_sub2 <- merge(average_dat_sub, comp_subset, by = c("LOO", "Experiment"), all.x = F)
average_dat_sub2 <- rbind(average_dat_sub2, average_dat_sub[LOO=="DM1" & Experiment %in% average_dat_sub2[,unique(Experiment)]], fill = T)
## Split it up for each compound
average_dat3 <- data.table()
for(j in 1:nrow(comp_subset)){
dat_sub <- average_dat_sub2[Experiment == comp_subset[j, Experiment] & (LOO=="DM1" | LOO == comp_subset[j, LOO])]
dat_sub[,LOOGroup:=comp_subset[j, LOO]]
average_dat3 <- rbind(average_dat3, dat_sub)
}
average_dat3 <- merge(average_dat3, unique(model_fits_compare_good2b[,list(LOO, LOOCategory2)]), by.x = "LOOGroup", by.y = "LOO", all.x = T)
average_dat3[,ColorCol:=ifelse(LOO=="DM1", "EDM1", as.character(LOOCategory2))]
color_set <- c(category_colors, "grey50")
names(color_set)[5] <- "EDM1"
average_dat3[,LOOCategory2:=factor(LOOCategory2, levels = c("Amino acids", "Vitamins", "Minerals", "Others"))]
effect_ranking <- median_fracs_wide[order(auc_e), LOO]
average_dat3 <- average_dat3[!(LOOGroup=="B12" & Experiment %in% c("DM1OldB12Old", "VitaminLOOs3"))]
average_dat3[,OrderedLOO:=factor(LOOGroup, levels = effect_ranking)]
comp_order <- unique(average_dat3[order(LOOCategory2), LOOGroup])
facet_effect_plot <- ggplot(average_dat3[Condition2 != "DM1_v2"], aes(x = time, ymin = V1-V2, ymax = V1+V2, fill = ColorCol, group = Condition2)) + geom_ribbon(alpha = 0.4) + geom_line(aes(y = V1, color = ColorCol)) +
ylab("E. lenta DSM 2243 abundance (normalized OD600)") + xlab("Time (hr)") + facet_wrap(~factor(LOOGroup, levels = comp_order), ncol = 7) + theme_bw() + scale_color_manual(values = color_set, name = "Removed compound class") + scale_fill_manual(values = color_set, name = "Removed compound class") + theme(strip.background= element_blank(), panel.grid = element_blank()) + xlim(0, 78)
ggsave(facet_effect_plot, file = paste0(outdir, "allGrowthCurveLOOs.pdf"), width = 7.3, height = 6)
fwrite(average_dat3, file = paste0(outdir, "finalAvgLOOgrowthdata.csv"))
facet_effect_plotall_growth_data <- corrected_data_all[(Strain %in% c("2243", "c") & (Experiment != "VitaminLOOs2" & (grepl("LOO", Experiment))|
Experiment %in% c("DM1OldB12Old")|
(Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil")|
Experiment == "AddCarboxylicAcids")) |(Experiment == "AgmArgCitrl" & !grepl("Base2", Condition2)))|Experiment == "PantothenateStrains"|Experiment=="AcetateDosesStrains"]
fwrite(all_growth_data, file = paste0(outdir, "growthDataPaperAll.txt"), sep = "\t")rm(list = ls())
library(data.table)
library(ggplot2)
library(cowplot)
library(readxl)
library(tidyverse)
library(webchem)
library(vegan)
library(RColorBrewer)
library(ggpubr)
library(ComplexHeatmap)
library(santaR)##
## This is santaR version 1.2.3
library(ggrepel)
theme_set(theme_classic2())
datadir <- ""
outdir <- "figure2/"
dir.create(outdir) # Gives a warning if it already exists
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] santaR_1.2.3 lubridate_1.8.0 ggbeeswarm_0.6.0
## [4] growthcurver_0.3.1 KEGGREST_1.32.0 patchwork_1.1.1
## [7] ggrepel_0.9.1 ComplexHeatmap_2.13.1 ggpubr_0.4.0
## [10] RColorBrewer_1.1-2 fastcluster_1.2.3 vegan_2.6-2
## [13] lattice_0.20-45 permute_0.9-5 webchem_1.1.2
## [16] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [19] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [22] tibble_3.1.7 tidyverse_1.3.1 readxl_1.3.1
## [25] cowplot_1.1.1 ggplot2_3.3.6 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.34.0
## [7] GlobalOptions_0.1.2 fs_1.5.0 clue_0.3-61
## [10] rstudioapi_0.13 farver_2.1.0 fansi_1.0.3
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.1.1
## [16] doParallel_1.0.17 knitr_1.39 jsonlite_1.8.0
## [19] Cairo_1.6-0 broom_0.8.0 cluster_2.1.3
## [22] dbplyr_2.1.1 png_0.1-7 shiny_1.6.0
## [25] data.tree_1.0.0 compiler_4.1.1 httr_1.4.2
## [28] backports_1.2.1 assertthat_0.2.1 Matrix_1.4-1
## [31] fastmap_1.1.0 cli_3.3.0 later_1.3.0
## [34] htmltools_0.5.2 tools_4.1.1 gtable_0.3.0
## [37] glue_1.6.2 GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3
## [40] carData_3.0-5 cellranger_1.1.0 jquerylib_0.1.4
## [43] vctrs_0.4.1 Biostrings_2.62.0 nlme_3.1-159
## [46] iterators_1.0.13 xfun_0.31 rvest_1.0.2
## [49] mime_0.12 lifecycle_1.0.0 rstatix_0.7.0
## [52] zlibbioc_1.40.0 MASS_7.3-57 scales_1.1.1
## [55] promises_1.2.0.1 hms_1.1.1 parallel_4.1.1
## [58] yaml_2.3.5 sass_0.4.1 stringi_1.7.6
## [61] highr_0.9 S4Vectors_0.32.4 foreach_1.5.2
## [64] BiocGenerics_0.40.0 shape_1.4.6 GenomeInfoDb_1.30.1
## [67] rlang_1.0.2 pkgconfig_2.0.3 matrixStats_0.62.0
## [70] bitops_1.0-7 evaluate_0.15 labeling_0.4.2
## [73] tidyselect_1.1.2 magrittr_2.0.3 R6_2.5.1
## [76] magick_2.7.3 IRanges_2.28.0 generics_0.1.0
## [79] DBI_1.1.1 pillar_1.7.0 haven_2.5.0
## [82] withr_2.5.0 mgcv_1.8-40 abind_1.4-5
## [85] RCurl_1.98-1.7 modelr_0.1.8 crayon_1.4.1
## [88] car_3.0-13 utf8_1.2.2 tzdb_0.3.0
## [91] rmarkdown_2.14 GetoptLong_1.0.5 minpack.lm_1.2-1
## [94] reprex_2.0.1 digest_0.6.29 xtable_1.8-4
## [97] httpuv_1.6.5 stats4_4.1.1 munsell_0.5.0
## [100] beeswarm_0.4.0 vipor_0.4.5 bslib_0.3.1
processed_data <- fread("processedDatasets/timecourse_all_data.csv")
summary_data <- fread("processedDatasets/timecourse_mean_summaries.csv")
growth_file = "../TimeCourse_take2_2020-8-17/ODdata.xlsx"
growth_data = read_xlsx(growth_file, sheet = 1)
time_pts = read_xlsx(growth_file, sheet = 2)
time_pts = data.table(time_pts %>% mutate(TimePoint = as.numeric(gsub("TP", "", TimePoint))))
growth_data = data.table(growth_data) %>% melt(id.var = c("Strain", "AcConc", "Replicate"))
growth_data[,Time:=as.numeric(gsub(".*_", "", variable))]
growth2 = growth_data
setnames(growth2, "value", "OD600")
growth2[Time==41.5, Time:=42]
growth2[Time==26.5, Time:=26]
growth2[Time==30.5, Time:=30]
mean_growth2 = growth2[, .(meanOD =mean(OD600), sdOD = sd(OD600)), by=list(Strain, AcConc, Time)]
mean_data2 = processed_data[Sample != "c_10_TP1_1_R1",list(meanValue = mean(value), SD_Value = sd(value)), by=list(Time, TimePoint, AcConc, Strain, IonMode, IDUniq, AlignmentID, MetName, MSI)]
mean_data2[,Strain:=factor(Strain, levels = c("2243", "AB8", "Val", "Ctrl"))]
## Which mets are significantly diff with acetate 0 vs 1?
santaR_metadata <- unique(processed_data[Strain == "2243" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")
santaR_data <- column_to_rownames(dcast(processed_data[Strain == "2243" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time,
group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)## Input data generated: 6.95 secs
## Spline fitted: 55.95 secs
## p-val dist done: 9.43 mins
## total time: 10.48 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features3 <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]
#Must also be signif diff from ctrls at final time point in at least 1 group
variable_features3 <- variable_features3[variable_features3 %in% summary_data[FDRCorrect < 0.2 & !is.na(FDRCorrect) & TimePoint==6 & Strain == 2243, unique(IDUniq)]]
processed_data[,ZScoreLog:=scale(log10value), by=list(IDUniq, Strain)]
mean_dat_plot <- processed_data[Strain %in% c("2243", "AB8", "Val") & AcConc %in% c(0,1, 10) & IDUniq %in% variable_features3 & Duplicate == 0,
mean(ZScoreLog), by = list(Strain, AcConc, IDUniq, MetName, MSI,Time, CF_superclass)]
mean_dat_plot[,DuplicateID:=length(unique(IDUniq)), by=MetName]
feat_list <- mean_dat_plot[!MetName %in% c("Unknown", "unknown") & Strain == 2243 & !(DuplicateID==2 & grepl("Pos", IDUniq)), unique(IDUniq)]
variable_feat_order_sub <- variable_features3[variable_features3 %in% feat_list]
label_names <- sapply(variable_feat_order_sub, function(x){ return(mean_dat_plot[IDUniq==x, MetName][1])})
mean_dat_plot_wide <- dcast(mean_dat_plot[IDUniq %in% variable_feat_order_sub & Strain == 2243], IDUniq~AcConc+Time, value.var = "V1")
label_names <- sapply(mean_dat_plot_wide[,IDUniq], function(x){ return(mean_dat_plot[IDUniq==x, paste0(MetName, " (", MSI, ")")][1])})
Heatmap(mean_dat_plot_wide[,2:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)],
row_labels = label_names, row_names_gp = gpar(fontsize = 7), column_split = factor(c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)), levels = c("0mM", "1mM", "10mM")),
cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 3)),
column_names_rot = 0, column_names_gp = gpar(fontsize = 8))pdf(paste0(outdir, "acetate_heatmap_vX.pdf"), width = 7, height = 5.6)
Heatmap(mean_dat_plot_wide[,2:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)],
row_labels = label_names, row_names_gp = gpar(fontsize = 7), column_split = factor(c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)), levels = c("0mM", "1mM", "10mM")),
cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 3)),
column_names_rot = 0, column_names_gp = gpar(fontsize = 8))
dev.off()## png
## 2
############ Growth of all 3 strains
mean_growth <- growth2[,list(mean(OD600), sd(OD600)/sqrt(length(OD600))), by = list(Time, AcConc, Strain)]
growth_all <- ggplot(mean_growth[AcConc != "0Add10"], aes(x = Time, y = V1, color = AcConc)) + geom_line() + geom_errorbar(width = 0, aes(ymin = V1-V2, ymax = V1+V2)) + geom_point() + facet_wrap(~Strain, nrow = 3) + scale_color_manual(values = brewer.pal(5, "Blues")[3:5]) + ylab("OD600") + xlab("Time (hr)")
ggsave(growth_all, file = paste0(outdir, "acetateExperimentGrowth.pdf"), width = 3.5, height = 6)
growth_all############## AB8 and Valencia SantaR
## Which mets are significantly diff with acetate 0 vs 1 for each of these strains?
santaR_metadata <- unique(processed_data[Strain == "AB8" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")
santaR_data <- column_to_rownames(dcast(processed_data[Strain == "AB8" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_AB8_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time,
group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)## Input data generated: 7.56 secs
## Spline fitted: 1.01 mins
## p-val dist done: 9.46 mins
## total time: 10.6 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results_AB8.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features_ab8 <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]
###### Valencia
santaR_metadata <- unique(processed_data[Strain == "Val" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")
santaR_data <- column_to_rownames(dcast(processed_data[Strain == "Val" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_Val_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time,
group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)## Input data generated: 9.39 secs
## Spline fitted: 56.62 secs
## p-val dist done: 9.51 mins
## total time: 10.61 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results_Val.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features_val <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]
############################ Heatmap all strains (Figure S3)
variable_feat_all <- sort(unique(c(variable_features3, variable_features_ab8, variable_features_val)))
mean_dat_plot <- processed_data[Strain %in% c("2243", "AB8", "Val") & AcConc %in% c(0,1, 10) & IDUniq %in% variable_feat_all & Duplicate == 0,
mean(ZScoreLog), by = list(Strain, AcConc, IDUniq, MetName, MSI,Time, CF_superclass)]
mean_dat_plot[,DuplicateID:=length(unique(IDUniq)), by=MetName]
feat_list <- mean_dat_plot[!MetName %in% c("Unknown", "unknown") & Strain == 2243 & !(DuplicateID==2 & grepl("Pos", IDUniq)), unique(IDUniq)]
variable_feat_order_sub <- variable_feat_all[variable_feat_all %in% feat_list]
label_names <- sapply(variable_feat_order_sub, function(x){ return(mean_dat_plot[IDUniq==x, MetName][1])})
mean_dat_plot_wide <- dcast(mean_dat_plot[IDUniq %in% variable_feat_order_sub], IDUniq~Strain+AcConc+Time, value.var = "V1")
label_names <- sapply(mean_dat_plot_wide[,IDUniq], function(x){ return(mean_dat_plot[IDUniq==x, paste0(MetName, " (", MSI, ")")][1])})
Heatmap(mean_dat_plot_wide[,20:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)],
row_labels = label_names, row_names_gp = gpar(fontsize = 7),
column_split = factor(c(
paste0("AB8n2 ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6))),
paste0("Valencia ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)))),
levels = c("AB8n2 0mM", "AB8n2 1mM",
"AB8n2 10mM", "Valencia 0mM", "Valencia 1mM", "Valencia 10mM")),
cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 6)),
column_names_rot = 0, column_names_gp = gpar(fontsize = 7), column_title_gp = gpar(fontsize = 10))pdf(paste0(outdir, "acetate_heatmap_vX_OtherStrains.pdf"), width = 8, height = 5.8)
Heatmap(mean_dat_plot_wide[,20:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)],
row_labels = label_names, row_names_gp = gpar(fontsize = 7),
column_split = factor(c(
paste0("AB8n2 ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6))),
paste0("Valencia ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)))),
levels = c("AB8n2 0mM", "AB8n2 1mM",
"AB8n2 10mM", "Valencia 0mM", "Valencia 1mM", "Valencia 10mM")),
cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 6)),
column_names_rot = 0, column_names_gp = gpar(fontsize = 7), column_title_gp = gpar(fontsize = 10))
dev.off()## png
## 2
rm(list = ls())
library(data.table)
library(tidyverse)
library(readxl)
library(paletteer)
library(RColorBrewer)
library(ggpubr)
theme_set(theme_classic2())
################ Intracellular
datadir <- "../SILAcetateTimeCourse/"
outdir <- "figure2/"
dir.create(outdir)
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] paletteer_1.4.0 santaR_1.2.3 lubridate_1.8.0
## [4] ggbeeswarm_0.6.0 growthcurver_0.3.1 KEGGREST_1.32.0
## [7] patchwork_1.1.1 ggrepel_0.9.1 ComplexHeatmap_2.13.1
## [10] ggpubr_0.4.0 RColorBrewer_1.1-2 fastcluster_1.2.3
## [13] vegan_2.6-2 lattice_0.20-45 permute_0.9-5
## [16] webchem_1.1.2 forcats_0.5.1 stringr_1.4.0
## [19] dplyr_1.0.7 purrr_0.3.4 readr_2.1.2
## [22] tidyr_1.2.0 tibble_3.1.7 tidyverse_1.3.1
## [25] readxl_1.3.1 cowplot_1.1.1 ggplot2_3.3.6
## [28] data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.34.0
## [7] GlobalOptions_0.1.2 fs_1.5.0 clue_0.3-61
## [10] rstudioapi_0.13 farver_2.1.0 fansi_1.0.3
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.1.1
## [16] doParallel_1.0.17 knitr_1.39 jsonlite_1.8.0
## [19] Cairo_1.6-0 broom_0.8.0 cluster_2.1.3
## [22] dbplyr_2.1.1 png_0.1-7 shiny_1.6.0
## [25] data.tree_1.0.0 compiler_4.1.1 httr_1.4.2
## [28] backports_1.2.1 assertthat_0.2.1 Matrix_1.4-1
## [31] fastmap_1.1.0 cli_3.3.0 later_1.3.0
## [34] htmltools_0.5.2 tools_4.1.1 gtable_0.3.0
## [37] glue_1.6.2 GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3
## [40] carData_3.0-5 cellranger_1.1.0 jquerylib_0.1.4
## [43] vctrs_0.4.1 Biostrings_2.62.0 nlme_3.1-159
## [46] iterators_1.0.13 xfun_0.31 rvest_1.0.2
## [49] mime_0.12 lifecycle_1.0.0 rstatix_0.7.0
## [52] zlibbioc_1.40.0 MASS_7.3-57 scales_1.1.1
## [55] promises_1.2.0.1 hms_1.1.1 parallel_4.1.1
## [58] rematch2_2.1.2 yaml_2.3.5 sass_0.4.1
## [61] stringi_1.7.6 highr_0.9 S4Vectors_0.32.4
## [64] foreach_1.5.2 BiocGenerics_0.40.0 shape_1.4.6
## [67] GenomeInfoDb_1.30.1 rlang_1.0.2 pkgconfig_2.0.3
## [70] matrixStats_0.62.0 bitops_1.0-7 evaluate_0.15
## [73] labeling_0.4.2 tidyselect_1.1.2 magrittr_2.0.3
## [76] R6_2.5.1 magick_2.7.3 IRanges_2.28.0
## [79] generics_0.1.0 DBI_1.1.1 pillar_1.7.0
## [82] haven_2.5.0 withr_2.5.0 mgcv_1.8-40
## [85] abind_1.4-5 RCurl_1.98-1.7 modelr_0.1.8
## [88] crayon_1.4.1 car_3.0-13 utf8_1.2.2
## [91] tzdb_0.3.0 rmarkdown_2.14 GetoptLong_1.0.5
## [94] minpack.lm_1.2-1 reprex_2.0.1 digest_0.6.29
## [97] xtable_1.8-4 httpuv_1.6.5 stats4_4.1.1
## [100] munsell_0.5.0 beeswarm_0.4.0 vipor_0.4.5
## [103] bslib_0.3.1
##### Set up
lower_level_intra_data <- readRDS(paste0(datadir, "IntracellularExtracts/LowLevelDataAllProcessed.rds"))
bad_feat3 <- lower_level_intra_data[AreaFrac > 1000 & NumLabeledC > 0, sum(Status == "Contaminating mass"), by = FeatID] ##
lower_level_intra_data[,BadFeat3:=ifelse(FeatID %in% bad_feat3[V1 > 11, FeatID], 1, 0)] ## contaminating in > 5% of
lower_level_intra_data <- lower_level_intra_data[BadFeat3==0]
lower_level_intra_data <- lower_level_intra_data[is.na(AlwaysZero)]
feat_wide <- dcast(lower_level_intra_data, Name+MetID+Formula+Tags.y+AnnotationMolWt+RT+NumLabeledC+TotNumC+Mode+FracLabeled+CompID+FeatID~Sample2, value.var = "AreaFrac", fill = 0)
lower_level_data_long <- melt(feat_wide, id.vars = names(feat_wide)[1:12], variable.name = "Sample", value.name = "AreaFrac")
lower_level_data_long[,c("Strain", "AcetateGroup", "Replicate", "TimePoint"):=tstrsplit(Sample, split = "_")]
lower_level_data_long[,TimePoint:=case_when(
TimePoint=="TP4i" ~ "TP3",
TimePoint=="TP6i" ~ "TP5"
)]
lower_level_data_long[,AcetateGroup:=gsub("Ac", "", AcetateGroup)]
# Avg and SE across replicates
summary_tab2 <- lower_level_data_long[TimePoint == "TP5",list(mean(AreaFrac), median(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by=list(AcetateGroup, Strain, CompID, Name, Tags.y, NumLabeledC, FeatID, AnnotationMolWt, Formula)]
summary_tab2[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]
# Tot signal from labeled vs unlabeled
tot_labeled_unlabeled <- summary_tab2[,sum(V1), by=list(AcetateGroup, Strain, CompID, Name, Tags.y, NumLabeledC==0)]
summary_tab <- dcast(tot_labeled_unlabeled, AcetateGroup+Strain+CompID+Name+Tags.y~NumLabeledC, value.var = "V1")
setnames(summary_tab, c("TRUE", "FALSE"), c("Unlabeled", "Labeled"))
summary_tab[,sum(Labeled > 1000), by=CompID]## CompID
## 1: Neg_2-Hydroxy-4-methylpentanoic acid_[M-H]-_LVRFTAZAXQPQHI-UHFFFAOYSA-N_132.0785_1.854
## 2: Neg_3-Phenyllactic acid_166.0628_2.249
## 3: Neg_5-Aminovaleric acid_117.0787_7.79
## 4: Neg_Azelaic acid_188.1046_1.448
## 5: Neg_Citraconic acid_130.0265_1.682
## ---
## 177: Pos_Unknown_337.33446_338.3414_12.028
## 178: Pos_Unknown_347.15551_348.1662_7.328
## 179: Pos_Unknown_360.19367_361.1981_8.667
## 180: Pos_Valine_117.07898_118.086_7.78
## 181: Pos_[Alanine_[M+H]+_QNAYBMKLOCPYGJ-REOHCLBHSA-N]_89.04768_90.0549_8.107
## V1
## 1: 7
## 2: 11
## 3: 18
## 4: 3
## 5: 11
## ---
## 177: 12
## 178: 9
## 179: 5
## 180: NA
## 181: 6
summary_tab[is.na(Labeled), Labeled:=0]
summary_tab[is.na(Unlabeled), Unlabeled:=0]
unlabeled_comps <- summary_tab[Labeled < 1000 & Unlabeled > 10000 & AcetateGroup %in% c("1L", "10L") & Strain != "c", unique(CompID)]
unlabeled_comps2 <- summary_tab[Unlabeled > 2*Labeled & AcetateGroup %in% c("1L", "10L") & Strain != "c" , unique(CompID)]
ctrl_levels <- summary_tab[Strain == "c", list(mean(Labeled), mean(Unlabeled)), by=list(CompID, AcetateGroup)]
setnames(ctrl_levels, c("V1", "V2"), c("ctrlLabeled", "ctrlUnlabeled"))
summary_tab <- merge(summary_tab, ctrl_levels, by = c("CompID", "AcetateGroup"), all.x = T)
unlabeled_comps3 <- summary_tab[Labeled < 1000 & Unlabeled > 10000 & AcetateGroup %in% c("1L", "10L") & Strain != "c" & (Labeled+Unlabeled)/(ctrlLabeled+ctrlUnlabeled) > 1.3, unique(CompID)]
#### Define compounds to plot
top_comps2 <- summary_tab[Strain == "2243" & AcetateGroup %in% c("1L", "10L") &
Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]
bad_comps <- summary_tab[Strain != "c" & !AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.5 , unique(CompID)]
top_comps2 <- top_comps2[!top_comps2 %in% bad_comps]
#What labeled comps are added by other species?
top_comps2_otherspec <- summary_tab[Strain %in% c("AB8n2", "Valencia") & AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]
top_comps2_otherspec <- top_comps2_otherspec[!top_comps2_otherspec %in% bad_comps]
length(top_comps2_otherspec[!top_comps2_otherspec %in% top_comps2])## [1] 4
top_comps_ab8 <- summary_tab[Strain == "AB8n2" & AcetateGroup %in% c("1L", "10L") &
Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]
top_comps_ab8 <- top_comps_ab8[!top_comps_ab8 %in% bad_comps]
top_comps_val <- summary_tab[Strain == "Valencia" & AcetateGroup %in% c("1L", "10L") &
Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]
top_comps_val <- top_comps_val[!top_comps_val %in% bad_comps]
top_data_intra <- summary_tab2[CompID %in% top_comps2 & AcetateGroup %in% c("1L", "10L")]
top_data_intra[,Class:=case_when(
grepl("NAD", CompID) ~ "Vitamins and cofactors",
grepl("UDP", CompID) ~ "Other",
grepl("Thymidine", CompID) ~ "Nucleic acids",
grepl("Acetyl", CompID) ~ "Other",
grepl("Glucose", CompID) ~ "Carbohydrates",
grepl("Orotic", CompID) ~ "Nucleic acids",
grepl("Gluta", CompID) ~ "Peptides and amino acids",
grepl("Alanine", CompID) ~ "Peptides and amino acids"
)]
top_data_intra[,Name2:=gsub("^[A-Za-z]+_", "", CompID)]
comp_order <- unique(top_data_intra[,list(Name2, Class)])[order(Class), Name2]
color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
names(color_scale2) <- top_data_intra[Strain %in% c("c", "2243") & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)),sort(unique(NumLabeledC2))]
intracellular_plot5 <- ggplot(top_data_intra[Strain %in% c("2243") & !grepl("known", CompID) & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)) ],
aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) +
geom_bar(stat = "identity", position = "fill", color = "black") + facet_wrap(factor(Name2, levels = comp_order)~., ncol = 9) +
theme_classic2() + theme(strip.text = element_text(angle =0, size = 9, hjust = 0, face = "bold"), axis.text = element_text(size = 7), strip.background = element_blank(), legend.position = "bottom")+
scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
ylab("Share of Peak Area")
ggsave(intracellular_plot5, file = paste0(outdir, "intracellular_sil_2243_ID_summary_2rows.pdf"), width = 8.5, height = 2.6)
intracellular_plot5summary_tab2[CompID %in% top_comps2 & Strain %in% c("c", "2243") & grepl("known", CompID) & AcetateGroup %in% c("1L", "10L") , length(unique(CompID))]## [1] 7
###### What frac of UDP is labeled?
summary_tab[grepl("UDP", CompID) & Strain == "2243" & AcetateGroup %in% c("1L", "10L"), mean(Labeled/(Unlabeled+Labeled)), by = AcetateGroup]## AcetateGroup V1
## 1: 10L 1.0000000
## 2: 1L 0.9758363
### Combined AB8, Val, 2243 plot
combined_dat <- summary_tab2[AcetateGroup %in% c("1L", "10L")& Strain != "c" & CompID %in% c(top_comps2, top_comps_ab8, top_comps_val) & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)) ]
combined_dat[,CompLab:=gsub("Labeled ", "", CompID)]
combined_dat[grepl("Alanine", CompID), CompLab:="Pos_Alanine_89.04768_90.0549_8.107"]
combined_dat[,CompLab:=ifelse(grepl("Neg", CompID), paste0(gsub("Neg_", "", CompLab), "_Neg"),
paste0(gsub("Pos_", "", CompLab), "_Pos"))]
combined_dat <- combined_dat[!CompLab %in% c("Unknown_129.0424_8.79_Neg" , "Unknown_129.04259_130.0498_8.662_Pos")] ## Redundant
intracellular_3strain <- ggplot(combined_dat,
aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) +
geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~CompLab, switch = "y") +
theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
ylab("Share of Peak Area")
ggsave(intracellular_3strain, file = paste0(outdir, "intracellular_sil_3strains.pdf"), width = 9, height = 4.5)
combined_dat[,MSI:=gsub(";", "", gsub("Labeled", "", Tags.y))]
combined_dat[MSI == "NA", MSI:=""]
intracellular_3strain_id <- ggplot(combined_dat[!grepl("known", CompLab)],
aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) +
geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~paste0(Name, " (", MSI, ")"), switch = "y") +
theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
ylab("Share of Peak Area")
ggsave(intracellular_3strain_id, file = paste0(outdir, "intracellular_sil_3strains_id.pdf"), width = 7, height = 4)
intracellular_3strain_idintracellular_3strain_unknown <- ggplot(combined_dat[grepl("known", CompLab)],
aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) +
geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~CompLab, switch = "y") +
theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(),
legend.position = "bottom") +
scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
ylab("Share of Peak Area")
ggsave(intracellular_3strain_unknown, file = paste0(outdir, "intracellular_sil_3strains_unknown.pdf"), width = 7, height = 4)
intracellular_3strain_unknown#### Supp table
tab_save <- summary_tab2[(CompID %in% c(top_comps2, top_comps_val, top_comps_ab8)) & Strain != "c" & AcetateGroup %in% c("1L", "10L")][V1 != 0]
setnames(tab_save, c("V1", "V3"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, AcetateGroup)]
tab_save[,Strain:=case_when(
Strain == "AB8" ~ "AB8n2",
Strain == "Val" ~ "Valencia",
TRUE ~ Strain
)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]
tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)]], CompID+Tags.y+Formula+Strain ~ paste0("M+", NumLabeledC) + AcetateGroup, value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[3])
})]
tab_save[,RT:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[length(foo)])
})]
tab_save[grepl("Alanine", CompID), MolWt:="89.04768"]
tab_save[,CompID:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(paste0(foo[2], "_", foo[1]))
})]
tab_save <- tab_save[,c("Strain", "CompID", "Tags.y", "MolWt", "Formula", "RT", sort(names(tab_save)[7:ncol(tab_save)])), with=F]
fwrite(tab_save[order(Strain, CompID)], file = paste0(outdir, "intracellular_ac_labeling_summary.tsv"), sep = "\t")
rm(list = ls())########## Extracellular
datadir <- "../SILAcetateTimeCourse/"
datadir2 <- "../"
outdir <- "figure2/"
low_level_data <- readRDS(paste0(datadir2, "LowLevelDataAllProcessedFixedAcetate.rds"))
top_data <- readRDS(paste0(datadir2, "SILprocessedTopLevelDataReplicatesFixedAcetate.rds"))
top_data_mean <- readRDS(paste0(datadir2, "SILprocessedTopLevelDataMeansFixedAcetate.rds"))
low_level_data[,Strain2:=factor(Strain, levels = c("c", "2243", "AB8", "Val"))]
levels(low_level_data$Strain2) <- c("Ctrl", "2243", "AB8n2", "Valencia")
bad_feat2 <- low_level_data[!grepl("L", AcetateGroup) & AreaFrac > 1e6 & NumLabeledC > 0, list(CompID, ExLabel, NumLabeledC, FeatID)][,length(NumLabeledC), by=list(CompID, ExLabel, FeatID)][V1 > 1]
bad_feats <- low_level_data[BadFeat==1,list(CompID, ExLabel, NumLabeledC)][,length(NumLabeledC), by=list(CompID, ExLabel)][V1 > 1]
bad_feats[,BadFeatAll:=1]
most_signal <- low_level_data[TimePoint=="TP8" & Strain != "c",sum(AreaFrac*NumLabeledC),
by = list(Strain, AcetateGroup, TimePoint, Time, CompID)][,max(V1), by=CompID][order(V1, decreasing = T)][1:7, CompID]
color_scale = c("gray", paletteer::paletteer_dynamic("cartography::turquoise.pal", 16))
names(color_scale) = c(seq(0, 15), 17)
color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
low_level_data[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]
# Avg and SE across replicates
summary_tab2ex <- low_level_data[Sample2 != "c_10L_TP8_1",list(mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by=list(AcetateGroup, Strain, CompID, Name, ExLabel, FeatID, TimePoint, Time, MSI, MolWt, Formula)]
summary_tab2ex[,NumLabeledC:=as.numeric(gsub("ExRate", "", ExLabel))]
tot_labeled_unlabeled_ex <- summary_tab2ex[TimePoint=="TP8",sum(V1), by=list(AcetateGroup, Strain, CompID, Name, ExLabel=="ExRate0")]
summary_tab_ex <- dcast(tot_labeled_unlabeled_ex, AcetateGroup+Strain+CompID+Name~ExLabel, value.var = "V1")
setnames(summary_tab_ex, c("TRUE", "FALSE"), c("Unlabeled", "Labeled"))
#### Define top comps
top_comps2ex <- summary_tab_ex[Strain == "2243" & AcetateGroup %in% c( "10L") & Labeled/(Labeled+Unlabeled) > 0.5 &
Labeled > 50000, unique(CompID)]
bad_comps_ex <- summary_tab_ex[Strain != "c" & !AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.5 & Labeled > 1e5, unique(CompID)]
top_comps2ex <- top_comps2ex[!top_comps2ex %in% bad_comps_ex]
top_comps2ex_otherspec <- summary_tab_ex[Strain %in% c("AB8", "Val") & AcetateGroup %in% c( "10L") & Labeled/(Labeled+Unlabeled) > 0.5 & Labeled > 50000, unique(CompID)]
top_comps2ex_otherspec <- top_comps2ex_otherspec[!top_comps2ex_otherspec %in% bad_comps_ex]
length(top_comps2ex_otherspec[!top_comps2ex_otherspec %in% top_comps2ex])## [1] 2
# Get rid of duplicates
top_comps2ex_sub <- top_comps2ex[!top_comps2ex %in% c("ACETYL ARGININE_Neg", "D-(-)-Glutamine_Neg",
"L-Glutamic acid_Neg", "N-Acetylornithine_Neg",
"N-Acetyltryptophan_Neg", "Pantothenic acid_Neg")]
color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
summary_tab2ex[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]
names(color_scale2) <- summary_tab2ex[,sort(unique(NumLabeledC2))]
color_scale_sub <- color_scale2[names(color_scale2) %in% summary_tab2ex[CompID %in% top_comps2ex & Strain == 2243 & AcetateGroup %in% c("10L") & !grepl("myristic", CompID) & !grepl("ASCORBIC", CompID) & V1 != 0, NumLabeledC2]]
summary_tab2ex[CompID=="Acetylarginine_Pos", CompID:="N-Acetyl-arginine_Pos"]
dup_comps <- c("D-(-)-Glutamine_Neg", "N-Acetylornithine_Neg", "L-Glutamic acid_Neg", "N-Acetyltryptophan_Neg", "Pantothenic acid_Neg")
extracellular_plot5 <- ggplot(summary_tab2ex[!CompID %in% dup_comps & (CompID %in% top_comps2ex_sub|CompID %in% top_comps2ex_otherspec)& Strain != "c" & AcetateGroup %in% c("10L") & !grepl("ASCORBIC", CompID) & V1 != 0],
aes(x = factor(paste0(Strain, " ", Time)), y = V1, fill = NumLabeledC2)) +
geom_vline(xintercept = 9.5, size = 0.3, linetype = 2) + geom_vline(xintercept = 18.5, size = 0.3, linetype = 2) +
geom_bar(stat = "identity", position = "stack", color = "black", size = 0.1) + facet_wrap(.~paste0(Name, " (", MSI, ")"), scales = "free_y", ncol = 6) +
theme_classic2() + theme(axis.ticks.x = element_blank(), strip.text = element_text(angle =0, size = 9, hjust = 0),
axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 8, face = "bold"),
strip.background= element_blank(), legend.position = "bottom")+
scale_fill_manual(values = color_scale_sub, guide = guide_legend(), name = "")+ scale_y_continuous(labels = scales::scientific, name = "Mean Peak Area") +
xlab("Time point") +
scale_x_discrete(labels = c("", "", "","", "2243", rep("", 8), "AB8n2", rep("", 8), "Valencia", rep("", 4)))
ggsave(extracellular_plot5, file = paste0(outdir, "extracellular_SIL_summaryIDcomps_allStrains.pdf"), width= 8.5, height = 5)
extracellular_plot5 ##### Extracellular compound summary (supp table)
tab_save <- summary_tab2ex[(CompID %in% top_comps2ex_sub|CompID %in% top_comps2ex_otherspec) & Strain != "c" & TimePoint == "TP8" & AcetateGroup %in% c("1L", "10L")][V1 != 0]
setnames(tab_save, c("V1", "V2"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, AcetateGroup)]
tab_save[,Strain:=case_when(
Strain == "AB8" ~ "AB8n2",
Strain == "Val" ~ "Valencia",
TRUE ~ Strain
)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]
tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)]], CompID+MolWt+Formula+MSI+Strain ~ paste0("M+", NumLabeledC) + AcetateGroup, value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save <- tab_save[,c("Strain", "CompID", "MolWt", "Formula", "MSI", sort(names(tab_save)[4:ncol(tab_save)])), with=F]
fwrite(tab_save[order(Strain, CompID)], file = paste0(outdir, "extracellular_ac_labeling_summary.tsv"), sep = "\t")
###### Count of labeled compounds
low_level_data[,TubeSample2:=paste0(Strain, "_", AcetateGroup2, "_", Replicate)]
low_level_data[,BadCtrl:=ifelse(TubeSample2 %in% c("c_10L_1", "c_1L_3"), 1, 0)]
bad_feat3 <- low_level_data[AreaFrac > 0 & NumLabeledC > 0, sum(Status == "Contaminating mass"), by = FeatID] ##
low_level_data[,BadFeat3:=ifelse(FeatID %in% bad_feat3[V1 > 16, FeatID], 1, 0)] ## contaminating in
## Remove labeled features present at high abund in first time point, not relevant
bad_feat4 <- low_level_data[NumLabeledC > 0 & (Time < 15|(Strain == "c" & Time < 30)) & AreaFrac > 5e5, unique(FeatID)]
#low_level_data <- low_level_data[!FeatID %in% bad_feat4]
labeled_compounds <- low_level_data[BadFeat==0 & BadFeat3 == 0 & BadCtrl == 0 & !FeatID %in% bad_feat4 & NumLabeledC > 0 & !Status %in% c("Contaminating mass", "Low pattern fit"), sum(AreaFrac), by = list(Name, CompID, Strain, AcetateGroup2, TubeSample,TimePoint, Sample, MSI, Formula, Replicate, Time, TotNumC)]
labeled_compounds_count <- labeled_compounds[,length(unique(CompID[V1 > 100000])), by = list(Strain, AcetateGroup2,TimePoint, Time, Replicate)]
ac_group_scale <- brewer.pal(5, "Paired")[c(5, 1:4)]
names(ac_group_scale) <- c("0", "1", "1L", "10", "10L")
labeled_compounds_comp_mean <- labeled_compounds_count[,list(mean(V1), sd(V1)/sqrt(length(V1))), by = list(Strain, AcetateGroup2, TimePoint, Time)]
labeled_compound_plot <- ggplot(labeled_compounds_comp_mean[Strain %in% c(2243, "c")], aes(x = Time, y = V1, color = AcetateGroup2)) + facet_wrap(~ifelse(Strain == "c", "Sterile ctrls", "E. lenta DSM 2243")) + geom_line() + geom_errorbar(width = 0, aes(ymin = V1-V2, ymax = V1+V2)) + geom_point() + scale_color_manual(values = ac_group_scale, labels = c("0 Ac", "1mM Ac", "1mM labeled Ac", "10mM Ac", "10mM labeled Ac"), name = "") + theme(legend.position = "right", strip.text = element_text(face = "bold"), strip.background = element_blank()) + xlab("Time (hr)") + ylab("Number of labeled\ncompounds detected\nby LC-MS/MS")
labeled_compound_plotggsave(labeled_compound_plot, file = paste0(outdir, "sil_2243_labeled_comp_plot.pdf"), width = 5.5, height = 2.35)
############### Growth plot
od_data = data.table(read_xlsx(paste0(datadir, "ODdata.xlsx")))
od_data[,TP9_64:=round(as.numeric(TP9_64), digits = 3)] #fix weirdness
od_data = melt(od_data, id.var = c("Strain", "AcConc", "Replicate"), variable.name = "TimePoint")
setnames(od_data, "AcConc", "AcetateGroup")
od_data[,AcConc:=gsub("L", "", AcetateGroup)]
od_data[,Labeled:=ifelse(grepl("L", AcetateGroup), "SIL", "Unlabeled")]
setnames(od_data, "Strain", "StrainFull")
od_data[,Strain:=ifelse(StrainFull=="Valencia", "Val", StrainFull)]
od_data[,Strain:=ifelse(Strain=="AB8n2", "AB8", Strain)]
od_data[,TimePoint:=gsub("_[0-9.]+$", "", TimePoint)]
od_data[,TimePoint:=paste0("TP", as.numeric(gsub("TP", "", TimePoint))-1)]
od_data[,Replicate:=as.character(Replicate)]
setnames(od_data, "value", "OD600")
time_pt_tab <- data.table(TimePoint=paste0("TP", 0:8), Time = c(0, 15, 18.5, 23,28.5, 39, 43, 47.5, 64))
od_data <- merge(od_data, time_pt_tab, by = "TimePoint", all.x = T)
mean_od_data <- od_data[,list(mean(OD600, na.rm = T), sd(OD600, na.rm = T)/sqrt(length(OD600[!is.na(OD600)]))), by=list(Time, TimePoint, Strain, StrainFull, AcConc, AcetateGroup)]
od_plot <- ggplot(mean_od_data, aes(x = Time, y = V1, color = AcetateGroup)) + geom_line() + geom_point() +
facet_wrap(~StrainFull) +
geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2, width = 0)) + ylab("Cell density (OD600)") + xlab("Time (hr)") +
theme(legend.position = "right") + scale_color_manual(values = ac_group_scale, labels = c("0 acetate", "1mM acetate", "1mM labeled acetate", "10mM acetate", "10mM labeled acetate"), name = "")
ggsave(od_plot, file = paste0(outdir, "sil_growth.pdf"), width = 7.5, height = 2.4)
od_plot###### Targeted acetate Figure S5
rm(list = ls())
library(data.table)
library(tidyverse)
library(readxl)
library(ggrepel)
library(ggpubr)
library(emmeans)
library(RColorBrewer)
datadir <- "processedDatasets/"
outdir <- "figure2/"
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] emmeans_1.7.5 paletteer_1.4.0 santaR_1.2.3
## [4] lubridate_1.8.0 ggbeeswarm_0.6.0 growthcurver_0.3.1
## [7] KEGGREST_1.32.0 patchwork_1.1.1 ggrepel_0.9.1
## [10] ComplexHeatmap_2.13.1 ggpubr_0.4.0 RColorBrewer_1.1-2
## [13] fastcluster_1.2.3 vegan_2.6-2 lattice_0.20-45
## [16] permute_0.9-5 webchem_1.1.2 forcats_0.5.1
## [19] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
## [22] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [25] tidyverse_1.3.1 readxl_1.3.1 cowplot_1.1.1
## [28] ggplot2_3.3.6 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 circlize_0.4.15 splines_4.1.1
## [4] GenomeInfoDb_1.30.1 digest_0.6.29 foreach_1.5.2
## [7] htmltools_0.5.2 magick_2.7.3 fansi_1.0.3
## [10] magrittr_2.0.3 cluster_2.1.3 doParallel_1.0.17
## [13] tzdb_0.3.0 Biostrings_2.62.0 modelr_0.1.8
## [16] matrixStats_0.62.0 colorspace_2.0-3 rvest_1.0.2
## [19] haven_2.5.0 xfun_0.31 crayon_1.4.1
## [22] RCurl_1.98-1.7 prismatic_1.1.0 jsonlite_1.8.0
## [25] iterators_1.0.13 glue_1.6.2 gtable_0.3.0
## [28] zlibbioc_1.40.0 XVector_0.34.0 GetoptLong_1.0.5
## [31] car_3.0-13 shape_1.4.6 BiocGenerics_0.40.0
## [34] abind_1.4-5 scales_1.1.1 mvtnorm_1.1-3
## [37] DBI_1.1.1 data.tree_1.0.0 rstatix_0.7.0
## [40] Rcpp_1.0.8.3 xtable_1.8-4 clue_0.3-61
## [43] stats4_4.1.1 httr_1.4.2 ellipsis_0.3.2
## [46] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.1
## [49] dbplyr_2.1.1 utf8_1.2.2 tidyselect_1.1.2
## [52] labeling_0.4.2 rlang_1.0.2 later_1.3.0
## [55] munsell_0.5.0 cellranger_1.1.0 tools_4.1.1
## [58] cli_3.3.0 generics_0.1.0 broom_0.8.0
## [61] evaluate_0.15 fastmap_1.1.0 yaml_2.3.5
## [64] rematch2_2.1.2 knitr_1.39 fs_1.5.0
## [67] nlme_3.1-159 mime_0.12 xml2_1.3.2
## [70] compiler_4.1.1 rstudioapi_0.13 beeswarm_0.4.0
## [73] png_0.1-7 ggsignif_0.6.3 reprex_2.0.1
## [76] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [79] Matrix_1.4-1 vctrs_0.4.1 pillar_1.7.0
## [82] lifecycle_1.0.0 jquerylib_0.1.4 GlobalOptions_0.1.2
## [85] estimability_1.4 bitops_1.0-7 httpuv_1.6.5
## [88] R6_2.5.1 promises_1.2.0.1 vipor_0.4.5
## [91] IRanges_2.28.0 codetools_0.2-18 MASS_7.3-57
## [94] assertthat_0.2.1 rjson_0.2.21 minpack.lm_1.2-1
## [97] withr_2.5.0 S4Vectors_0.32.4 GenomeInfoDbData_1.2.7
## [100] mgcv_1.8-40 parallel_4.1.1 hms_1.1.1
## [103] coda_0.19-4 rmarkdown_2.14 carData_3.0-5
## [106] Cairo_1.6-0 shiny_1.6.0
met_data <- fread(paste0(datadir, "targeted_met_data.csv"))
met_data[,Accuracy:=as.numeric(Accuracy)]
met_data[S_N=="∞", S_N:="Inf"]
met_data[,S_N:=as.numeric(S_N)]
met_data[,mean(S_N, na.rm = T), by=Compound]## Compound V1
## 1: 2-Me-Butyric acid 138544.389
## 2: Acetate 134242.122
## 3: Acetate-d4 Inf
## 4: Alanine Inf
## 5: Arg Inf
## 6: Aspartic acid Inf
## 7: Butanate Inf
## 8: Citrulline Inf
## 9: Glutamine Inf
## 10: Glycine Inf
## 11: IAA 10583.213
## 12: ILA 7267.553
## 13: Isobutanate 161849.719
## 14: Isocaproic acid 542197.851
## 15: Isovalerate 162773.168
## 16: Lactic acid Inf
## 17: Ornithine Inf
## 18: Propionate Inf
## 19: Succinate 137001.752
## 20: Trp 9378.787
## Questions
# How to interpret S_N
met_data[!is.na(Strain) & S_N < 10, table(Compound)]## Compound
## 2-Me-Butyric acid Acetate-d4 Alanine Aspartic acid
## 18 76 6 36
## Butanate Citrulline Glutamine Glycine
## 59 6 3 7
## IAA ILA Isobutanate Isocaproic acid
## 18 77 28 13
## Isovalerate Lactic acid Ornithine Propionate
## 22 14 31 33
## Succinate Trp
## 70 2
met_data[!is.na(Strain),length(unique(Sample))]## [1] 83
met_data[,LLOQ:=as.numeric(LLOQ)]
met_data[,ULOQ:=as.numeric(ULOQ)]
met_data[,BelowLLOQ:=ifelse(mM < LLOQ, 1, 0)]
met_data[,AboveULOQ:=ifelse(mM > ULOQ, 1, 0)]
met_data[,BadS_N:=ifelse(S_N < 10, 1, 0)]
met_data[BelowLLOQ==1 & S_N > 1e4]## Strain AcetateConc TimePoint Replicate well Compound MS_Sample
## 1: NA NA Glutamine
## 2: NA NA Glycine
## 3: NA NA Lactic acid
## 4: 2243 0 TP4 1 E2 Glutamine S.E2
## 5: 2243 0 TP4 2 E5 Aspartic acid S.E5
## 6: 2243 0 TP4 3 B3 Aspartic acid S.B3
## 7: 2243 0 TP7 1 E7 Aspartic acid S.E7
## 8: 2243 0 TP7 2 E9 Acetate-d4 S.E9
## 9: 2243 0 TP7 3 E3 Aspartic acid S.E3
## 10: 2243 10 TP4 3 G5 Aspartic acid S.G5
## 11: 2243 10 TP4 3 G5 Glutamine S.G5
## 12: 2243 10 TP7 1 C7 Aspartic acid S.C7
## 13: 2243 1 TP4 1 A2 Glutamine S.A2
## 14: 2243 1 TP7 2 A9 Alanine S.A9
## 15: 2243 10 TP8 2 C11 Aspartic acid S.C11
## 16: 2243 10 TP8 2 C11 Butanate S.C11
## 17: AB8 0 TP4 1 F2 Alanine S.F2
## 18: AB8 0 TP4 1 F2 Glutamine S.F2
## 19: AB8 0 TP4 2 F5 Alanine S.F5
## 20: AB8 0 TP4 3 C3 Aspartic acid S.C3
## 21: AB8 0 TP7 1 F7 Aspartic acid S.F7
## 22: AB8 0 TP7 2 F9 Aspartic acid S.F9
## 23: AB8 0 TP7 3 F3 Glutamine S.F3
## 24: AB8 10 TP4 1 D2 Aspartic acid S.D2
## 25: AB8 10 TP4 2 D5 Alanine S.D5
## 26: AB8 10 TP7 1 D7 Alanine S.D7
## 27: AB8 10 TP7 1 D7 Aspartic acid S.D7
## 28: AB8 10 TP7 2 D9 Aspartic acid S.D9
## 29: AB8 1 TP7 3 H9 Glutamine S.H9
## 30: AB8 1 TP8 1 B10 Arg S.B10
## 31: AB8 1 TP8 2 D10 Arg S.D10
## 32: AB8 1 TP8 2 D10 Aspartic acid S.D10
## 33: AB8 1 TP8 3 F10 Arg S.F10
## 34: AB8 1 TP8 3 F10 Aspartic acid S.F10
## 35: AB8 1 TP4 1 B2 Alanine S.B2
## 36: AB8 1 TP4 1 B2 Aspartic acid S.B2
## 37: AB8 1 TP4 1 B2 Glutamine S.B2
## 38: AB8 1 TP4 2 B5 Alanine S.B5
## 39: AB8 1 TP4 2 B5 Aspartic acid S.B5
## 40: AB8 1 TP4 2 B5 Propionate S.B5
## 41: AB8 1 TP4 3 H2 Alanine S.H2
## 42: AB8 1 TP4 3 H2 Lactic acid S.H2
## 43: AB8 1 TP7 1 B7 Alanine S.B7
## 44: AB8 1 TP7 1 B7 Glutamine S.B7
## 45: AB8 1 TP7 2 B9 Glutamine S.B9
## 46: AB8 10 TP8 1 B11 Alanine S.B11
## 47: AB8 10 TP8 3 F11 Alanine S.F11
## 48: AB8 10 TP8 3 F11 Arg S.F11
## 49: Val 0 TP4 2 F4 Aspartic acid S.F4
## 50: Val 0 TP4 2 F4 Glutamine S.F4
## 51: Val 0 TP4 3 A3 Aspartic acid S.A3
## 52: Val 0 TP4 3 A3 Glutamine S.A3
## 53: Val 0 TP7 1 F6 Alanine S.F6
## 54: Val 0 TP7 1 F6 Aspartic acid S.F6
## 55: Val 0 TP7 2 F8 Glutamine S.F8
## 56: Val 0 TP7 3 D3 Alanine S.D3
## 57: Val 0 TP7 3 D3 Aspartic acid S.D3
## 58: Val 0 TP7 3 D3 Lactic acid S.D3
## 59: Val 10 TP4 2 D4 Glutamine S.D4
## 60: Val 10 TP7 2 D8 Aspartic acid S.D8
## 61: Val 10 TP7 2 D8 Butanate S.D8
## 62: Val 1 TP7 3 G8 Aspartic acid S.G8
## 63: Val 1 TP4 1 B1 Glutamine S.B1
## 64: Val 1 TP4 2 B4 Aspartic acid S.B4
## 65: Val 1 TP4 3 G1 Acetate-d4 S.G1
## 66: Val 1 TP7 2 B8 Aspartic acid S.B8
## 67: c 0 TP4 1 E1 Lactic acid S.E1
## 68: c 0 TP4 2 E4 Aspartic acid S.E4
## 69: c 0 TP4 2 E4 Lactic acid S.E4
## 70: c 0 TP4 2 E4 Propionate S.E4
## 71: c 0 TP7 1 E6 Aspartic acid S.E6
## 72: c 0 TP7 2 E8 Aspartic acid S.E8
## 73: c 0 TP7 2 E8 Propionate S.E8
## 74: c 10 TP4 1 C1 Aspartic acid S.C1
## 75: c 10 TP4 1 C1 Lactic acid S.C1
## 76: c 1 TP8 2 H10 Aspartic acid S.H10
## 77: c 1 TP4 2 A4 Citrulline S.A4
## 78: c 10 TP8 2 H11 Aspartic acid S.H11
## 79: c 10 TP8 2 H11 Lactic acid S.H11
## Strain AcetateConc TimePoint Replicate well Compound MS_Sample
## Location Accuracy S_N iSTD_Area mM LLOQ ULOQ Row
## 1: Std_PBSSt_09 133.9407 Inf 13317.555 0.007848088 0.01276341 1.5
## 2: Std_PBSSt_06 127.1703 Inf 267555.886 0.059611080 0.07695009 1.5
## 3: Std_PBSSt_09 126.4290 Inf 2847892.614 0.014815901 0.02365441 3.0
## 4: P1-E02 NA Inf 13609.560 0.010380122 0.01276341 1.5 E
## 5: P1-E05 NA Inf 11519.064 0.000000000 0.10582321 1.5 E
## 6: P1-B03 NA Inf 10086.437 0.000000000 0.10582321 1.5 B
## 7: P1-E07 NA Inf 10216.210 0.000000000 0.10582321 1.5 E
## 8: P1-E09 NA Inf 3716180.478 0.020977126 0.23659610 30.0 E
## 9: P1-E03 NA Inf 12936.341 0.000000000 0.10582321 1.5 E
## 10: P1-G05 NA Inf 8455.305 0.000000000 0.10582321 1.5 G
## 11: P1-G05 NA Inf 13051.871 0.008988906 0.01276341 1.5 G
## 12: P1-C07 NA Inf 8940.437 0.000000000 0.10582321 1.5 C
## 13: P1-A02 NA Inf 13598.638 0.012228330 0.01276341 1.5 A
## 14: P1-A09 NA Inf 237354.466 0.034080913 0.04225643 1.5 A
## 15: P1-C11 NA Inf 11720.259 0.000000000 0.10582321 1.5 C
## 16: P1-C11 NA Inf 4139916.665 0.011578595 0.04182426 3.0 C
## 17: P1-F02 NA Inf 237172.273 0.000000000 0.04225643 1.5 F
## 18: P1-F02 NA Inf 13896.045 0.006668622 0.01276341 1.5 F
## 19: P1-F05 NA Inf 221704.533 0.000000000 0.04225643 1.5 F
## 20: P1-C03 NA Inf 11828.588 0.000000000 0.10582321 1.5 C
## 21: P1-F07 NA Inf 6116.839 0.000000000 0.10582321 1.5 F
## 22: P1-F09 NA Inf 9491.304 0.000000000 0.10582321 1.5 F
## 23: P1-F03 NA Inf 12522.156 0.008031104 0.01276341 1.5 F
## 24: P1-D02 NA Inf 12449.945 0.000000000 0.10582321 1.5 D
## 25: P1-D05 NA Inf 215118.742 0.000000000 0.04225643 1.5 D
## 26: P1-D07 NA Inf 238847.135 0.000000000 0.04225643 1.5 D
## 27: P1-D07 NA Inf 11397.935 0.000000000 0.10582321 1.5 D
## 28: P1-D09 NA Inf 9573.691 0.000000000 0.10582321 1.5 D
## 29: P1-C12 NA Inf 14281.698 0.012760079 0.01276341 1.5 H
## 30: P1-B10 NA Inf 13165.290 0.203157239 0.25452760 75.0 B
## 31: P1-D10 NA Inf 18900.441 0.218943341 0.25452760 75.0 D
## 32: P1-D10 NA Inf 10792.889 0.000000000 0.10582321 1.5 D
## 33: P1-F10 NA Inf 20038.595 0.209094601 0.25452760 75.0 F
## 34: P1-F10 NA Inf 10965.632 0.000000000 0.10582321 1.5 F
## 35: P1-B02 NA Inf 267477.513 0.000000000 0.04225643 1.5 B
## 36: P1-B02 NA Inf 10237.513 0.000000000 0.10582321 1.5 B
## 37: P1-B02 NA Inf 17466.593 0.003637131 0.01276341 1.5 B
## 38: P1-B05 NA Inf 239083.336 0.000000000 0.04225643 1.5 B
## 39: P1-B05 NA Inf 10388.202 0.000000000 0.10582321 1.5 B
## 40: P1-B05 NA Inf 4233599.429 0.000000000 0.03765327 3.0 B
## 41: P1-G03 NA Inf 242124.194 0.000000000 0.04225643 1.5 H
## 42: P1-G03 NA Inf 4639231.400 0.021531672 0.02365441 3.0 H
## 43: P1-B07 NA Inf 226236.422 0.000000000 0.04225643 1.5 B
## 44: P1-B07 NA Inf 12395.605 0.011970025 0.01276341 1.5 B
## 45: P1-B09 NA Inf 14023.541 0.011520241 0.01276341 1.5 B
## 46: P1-B11 NA Inf 207443.130 0.000000000 0.04225643 1.5 B
## 47: P1-F11 NA Inf 212096.498 0.000000000 0.04225643 1.5 F
## 48: P1-F11 NA Inf 19479.220 0.243873203 0.25452760 75.0 F
## 49: P1-F04 NA Inf 7779.508 0.000000000 0.10582321 1.5 F
## 50: P1-F04 NA Inf 12807.308 0.004922372 0.01276341 1.5 F
## 51: P1-A03 NA Inf 9502.729 0.000000000 0.10582321 1.5 A
## 52: P1-A03 NA Inf 13742.967 0.004083988 0.01276341 1.5 A
## 53: P1-F06 NA Inf 220093.700 0.000000000 0.04225643 1.5 F
## 54: P1-F06 NA Inf 12259.053 0.000000000 0.10582321 1.5 F
## 55: P1-F08 NA Inf 15491.828 0.011640307 0.01276341 1.5 F
## 56: P1-D03 NA Inf 234810.563 0.000000000 0.04225643 1.5 D
## 57: P1-D03 NA Inf 10652.036 0.000000000 0.10582321 1.5 D
## 58: P1-D03 NA Inf 4145786.201 0.020031942 0.02365441 3.0 D
## 59: P1-D04 NA Inf 13036.605 0.006170081 0.01276341 1.5 D
## 60: P1-D08 NA Inf 9566.066 0.000000000 0.10582321 1.5 D
## 61: P1-D08 NA Inf 3936465.219 0.007829900 0.04182426 3.0 D
## 62: P1-G08 NA Inf 8713.082 0.000000000 0.10582321 1.5 G
## 63: P1-B01 NA Inf 14346.205 0.006313181 0.01276341 1.5 B
## 64: P1-B04 NA Inf 12014.545 0.000000000 0.10582321 1.5 B
## 65: P1-G01 NA Inf 4556438.427 0.022587655 0.23659610 30.0 G
## 66: P1-B08 NA Inf 9955.895 0.000000000 0.10582321 1.5 B
## 67: P1-E01 NA Inf 4379147.683 0.003800583 0.02365441 3.0 E
## 68: P1-E04 NA Inf 8792.582 0.000000000 0.10582321 1.5 E
## 69: P1-E04 NA Inf 4375688.576 0.007344757 0.02365441 3.0 E
## 70: P1-E04 NA 10409.7 4375688.576 0.000000000 0.03765327 3.0 E
## 71: P1-E06 NA Inf 11833.863 0.000000000 0.10582321 1.5 E
## 72: P1-E08 NA Inf 9980.063 0.000000000 0.10582321 1.5 E
## 73: P1-E08 NA Inf 4225784.075 0.000000000 0.03765327 3.0 E
## 74: P1-C01 NA Inf 12865.803 0.000000000 0.10582321 1.5 C
## 75: P1-C01 NA Inf 4509021.797 0.005571540 0.02365441 3.0 C
## 76: P1-D12 NA Inf 11855.931 0.000000000 0.10582321 1.5 H
## 77: P1-A04 NA Inf 15242.210 0.000000000 0.02753048 30.0 A
## 78: P1-E12 NA Inf 12602.132 0.000000000 0.10582321 1.5 H
## 79: P1-E12 NA Inf 4257567.360 0.011125312 0.02365441 3.0 H
## Location Accuracy S_N iSTD_Area mM LLOQ ULOQ Row
## Column Sample OD600 Time BelowLLOQ AboveULOQ BadS_N
## 1: NA NA NA 1 0 0
## 2: NA NA NA 1 0 0
## 3: NA NA NA 1 0 0
## 4: 2 2243_0_TP4_1 0.162 28.5 1 0 0
## 5: 5 2243_0_TP4_2 0.167 28.5 1 0 0
## 6: 3 2243_0_TP4_3 0.142 28.5 1 0 0
## 7: 7 2243_0_TP7_1 0.396 47.5 1 0 0
## 8: 9 2243_0_TP7_2 0.395 47.5 1 0 0
## 9: 3 2243_0_TP7_3 0.299 47.5 1 0 0
## 10: 5 2243_1_TP4_3 0.212 28.5 1 0 0
## 11: 5 2243_1_TP4_3 0.212 28.5 1 0 0
## 12: 7 2243_1_TP7_1 0.583 47.5 1 0 0
## 13: 2 2243_10_TP4_1 0.174 28.5 1 0 0
## 14: 9 2243_10_TP7_2 0.733 47.5 1 0 0
## 15: 11 2243_10_TP8_2 0.907 64.0 1 0 0
## 16: 11 2243_10_TP8_2 0.907 64.0 1 0 0
## 17: 2 AB8_0_TP4_1 0.201 28.5 1 0 0
## 18: 2 AB8_0_TP4_1 0.201 28.5 1 0 0
## 19: 5 AB8_0_TP4_2 0.168 28.5 1 0 0
## 20: 3 AB8_0_TP4_3 0.150 28.5 1 0 0
## 21: 7 AB8_0_TP7_1 0.424 47.5 1 0 0
## 22: 9 AB8_0_TP7_2 0.324 47.5 1 0 0
## 23: 3 AB8_0_TP7_3 0.296 47.5 1 0 0
## 24: 2 AB8_1_TP4_1 0.351 28.5 1 0 0
## 25: 5 AB8_1_TP4_2 0.296 28.5 1 0 0
## 26: 7 AB8_1_TP7_1 0.992 47.5 1 0 0
## 27: 7 AB8_1_TP7_1 0.992 47.5 1 0 0
## 28: 9 AB8_1_TP7_2 0.876 47.5 1 0 0
## 29: 9 AB8_1_TP7_3 0.885 47.5 1 0 0
## 30: 10 AB8_1_TP8_1 0.947 64.0 1 0 0
## 31: 10 AB8_1_TP8_2 0.901 64.0 1 0 0
## 32: 10 AB8_1_TP8_2 0.901 64.0 1 0 0
## 33: 10 AB8_1_TP8_3 0.904 64.0 1 0 0
## 34: 10 AB8_1_TP8_3 0.904 64.0 1 0 0
## 35: 2 AB8_10_TP4_1 0.313 28.5 1 0 0
## 36: 2 AB8_10_TP4_1 0.313 28.5 1 0 0
## 37: 2 AB8_10_TP4_1 0.313 28.5 1 0 0
## 38: 5 AB8_10_TP4_2 0.288 28.5 1 0 0
## 39: 5 AB8_10_TP4_2 0.288 28.5 1 0 0
## 40: 5 AB8_10_TP4_2 0.288 28.5 1 0 0
## 41: 2 AB8_10_TP4_3 0.289 28.5 1 0 0
## 42: 2 AB8_10_TP4_3 0.289 28.5 1 0 0
## 43: 7 AB8_10_TP7_1 0.918 47.5 1 0 0
## 44: 7 AB8_10_TP7_1 0.918 47.5 1 0 0
## 45: 9 AB8_10_TP7_2 0.824 47.5 1 0 0
## 46: 11 AB8_10_TP8_1 0.913 64.0 1 0 0
## 47: 11 AB8_10_TP8_3 0.878 64.0 1 0 0
## 48: 11 AB8_10_TP8_3 0.878 64.0 1 0 0
## 49: 4 Val_0_TP4_2 0.130 28.5 1 0 0
## 50: 4 Val_0_TP4_2 0.130 28.5 1 0 0
## 51: 3 Val_0_TP4_3 0.096 28.5 1 0 0
## 52: 3 Val_0_TP4_3 0.096 28.5 1 0 0
## 53: 6 Val_0_TP7_1 0.249 47.5 1 0 0
## 54: 6 Val_0_TP7_1 0.249 47.5 1 0 0
## 55: 8 Val_0_TP7_2 0.260 47.5 1 0 0
## 56: 3 Val_0_TP7_3 0.176 47.5 1 0 0
## 57: 3 Val_0_TP7_3 0.176 47.5 1 0 0
## 58: 3 Val_0_TP7_3 0.176 47.5 1 0 0
## 59: 4 Val_1_TP4_2 0.179 28.5 1 0 0
## 60: 8 Val_1_TP7_2 0.689 47.5 1 0 0
## 61: 8 Val_1_TP7_2 0.689 47.5 1 0 0
## 62: 8 Val_1_TP7_3 0.778 47.5 1 0 0
## 63: 1 Val_10_TP4_1 0.174 28.5 1 0 0
## 64: 4 Val_10_TP4_2 0.193 28.5 1 0 0
## 65: 1 Val_10_TP4_3 0.204 28.5 1 0 0
## 66: 8 Val_10_TP7_2 0.765 47.5 1 0 0
## 67: 1 c_0_TP4_1 NA NA 1 0 0
## 68: 4 c_0_TP4_2 NA NA 1 0 0
## 69: 4 c_0_TP4_2 NA NA 1 0 0
## 70: 4 c_0_TP4_2 NA NA 1 0 0
## 71: 6 c_0_TP7_1 NA NA 1 0 0
## 72: 8 c_0_TP7_2 NA NA 1 0 0
## 73: 8 c_0_TP7_2 NA NA 1 0 0
## 74: 1 c_1_TP4_1 NA NA 1 0 0
## 75: 1 c_1_TP4_1 NA NA 1 0 0
## 76: 10 c_1_TP8_2 NA NA 1 0 0
## 77: 4 c_10_TP4_2 NA NA 1 0 0
## 78: 11 c_10_TP8_2 NA NA 1 0 0
## 79: 11 c_10_TP8_2 NA NA 1 0 0
## Column Sample OD600 Time BelowLLOQ AboveULOQ BadS_N
met_data[,PresentButUnreliable:=ifelse(BelowLLOQ==1|BadS_N, 1, 0)]
met_data[TimePoint=="TP4", Time:=28.5]
met_data[TimePoint=="TP7", Time:=47.5]
met_data[TimePoint=="TP8", Time:=64]
met_data[,Time:=as.numeric(Time)]
met_data[,Strain:=factor(Strain, levels = c("c", "2243", "AB8", "Val"))]
levels(met_data$Strain) = c("Ctrl", "2243", "AB8n2", "Valencia")
met_data[,Acetate2:=factor(AcetateConc)]
levels(met_data$Acetate2) <- c("0mM Ac", "1mM Ac", "10mM Ac")
acetate_plot <- ggplot(met_data[Compound == "Acetate" & !is.na(Time)], aes(x = Time, y = mM-1.05, color = Strain)) + geom_point() + geom_line(aes(group = paste0(Strain, Replicate))) + facet_wrap(~HypAcetateConc, nrow = 1, scales = "free_y") + cowplot::theme_cowplot() + ylab("Concentration (mM)") + scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)")
### Stats
acetate_data_sub <- met_data[Compound == "Acetate" & !is.na(Time)]
acetate_data_sub[,mMCorrected:=mM - 1.05]
acetate_data_sub[,Condition:=paste0(as.character(Acetate2), "_", Time)]
#### Separate by acetate group since the SE is so different between them
mod0ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "0mM Ac"])
diffs0ac <- emmeans(mod0ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs0ac[,Acetate2:="0mM Ac"]
mod1ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "1mM Ac"])
diffs1ac <- emmeans(mod1ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs1ac[,Acetate2:="1mM Ac"]
mod10ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "10mM Ac"])
diffs10ac <- emmeans(mod10ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs10ac[,Acetate2:="10mM Ac"]
diffs_all <- rbind(diffs0ac, diffs1ac, diffs10ac)
diffs_all[,Strain:=gsub(" - Ctrl", "", contrast)]
diffs_all[,Signif:=case_when(
p.value < 0.1 & p.value > 0.05 ~ ".",
p.value < 0.05 & p.value > 0.01 ~ "*",
p.value < 0.01 & p.value > 0.001 ~ "**",
p.value < 0.001 ~ "***",
TRUE ~ ""
)]
diffs_all[,ylev:=case_when(
Acetate2 == "0mM Ac" ~ 0.5,
Acetate2 == "1mM Ac" ~ 1.85,
Acetate2 == "10mM Ac" ~ 18.5
)]
diffs_all[,ylev:=case_when(
Strain == "2243" & Acetate2 == "1mM Ac" ~ ylev + 0.1,
Strain == "Valencia" & Acetate2 == "1mM Ac" ~ ylev-0.1,
TRUE ~ ylev
)]
diffs_all[,Acetate2:=factor(Acetate2, levels = c("0mM Ac", "1mM Ac", "10mM Ac"))]
ac_loq <- met_data[Compound=="Acetate", unique(LLOQ)]
## Plot data
acetate_means <- met_data[Compound == "Acetate" & !is.na(Time), list(mean(mM-1.05), sd(mM-1.05)/sqrt(length(mM))), by = list(Time, Strain, Acetate2)]
acetate_plot <- ggplot(acetate_means, aes(x = Time, y = V1, color = Strain)) +
geom_hline(yintercept = ac_loq, linetype = 2) +
geom_line(aes(group = Strain)) + geom_point() +
geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0)+
geom_text(data = diffs_all, aes(y = ylev, x = Time, label = Signif)) + scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) +
theme(strip.background = element_blank()) + facet_wrap(~Acetate2, nrow = 1, scales = "free_y") +
ylab("Concentration (mM)") + scale_x_continuous(expand = expansion(add = c(4, 4))) +
scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)")
ggsave(acetate_plot, file = paste0(outdir, "targeted_acetate_quant.pdf"), width = 6, height = 2)
acetate_plot#### WE will separate by acetate group since the SE is so different between them
arg_data_sub <- met_data[Compound == "Arg" & !is.na(Time)]
mod0ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "0mM Ac"])
diffs0ac <- emmeans(mod0ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs0ac[,Acetate2:="0mM Ac"]
mod1ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "1mM Ac"])
diffs1ac <- emmeans(mod1ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs1ac[,Acetate2:="1mM Ac"]
mod10ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "10mM Ac"])
diffs10ac <- emmeans(mod10ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs10ac[,Acetate2:="10mM Ac"]
diffs_all <- rbind(diffs0ac, diffs1ac, diffs10ac)
diffs_all[,Strain:=gsub(" - Ctrl", "", contrast)]
diffs_all[,Signif:=case_when(
p.value < 0.1 & p.value > 0.05 ~ ".",
p.value < 0.05 & p.value > 0.01 ~ "*",
p.value < 0.01 & p.value > 0.001 ~ "**",
p.value < 0.001 ~ "***",
TRUE ~ ""
)]
diffs_all[,ylev:=case_when(
Acetate2 == "0mM Ac" ~ 72,
Acetate2 == "1mM Ac" ~ 72,
Acetate2 == "10mM Ac" ~ 72
)]
diffs_all[,ylev:=case_when(
Strain == "2243" & Acetate2 == "1mM Ac" ~ ylev + 0.1,
Strain == "Valencia" & Acetate2 == "1mM Ac" ~ ylev-0.1,
TRUE ~ ylev
)]
diffs_all[,Acetate2:=factor(Acetate2, levels = c("0mM Ac", "1mM Ac", "10mM Ac"))]
arg_means <- met_data[Compound == "Arg" & !is.na(Time), list(mean(mM), sd(mM)/sqrt(length(mM))), by = list(Time, Strain, Acetate2)]
arg_plot <- ggplot(arg_means, aes(x = Time, y = V1, color = Strain)) + geom_hline(yintercept = 0, linetype = 2) +
geom_line(aes(group = Strain)) + geom_point() +
geom_text(data = diffs_all, aes(y = ylev, x = Time, label = Signif)) + scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) +
geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0)+
scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) +
theme(strip.background = element_blank()) + facet_wrap(~Acetate2, nrow = 1) +
ylab("Concentration of arginine (mM)") + scale_x_continuous(expand = expansion(add = c(4, 4))) +
scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)") ## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
ggsave(arg_plot, file = paste0(outdir, "targeted_arg_plot.pdf"), width = 6, height = 2)
arg_plotrm(list = ls())
library(data.table)
library(tidyverse)
library(ggpubr)
outdir <- "figure_s6/"
dir.create(outdir)
theme_set(theme_classic2())
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] emmeans_1.7.5 paletteer_1.4.0 santaR_1.2.3
## [4] lubridate_1.8.0 ggbeeswarm_0.6.0 growthcurver_0.3.1
## [7] KEGGREST_1.32.0 patchwork_1.1.1 ggrepel_0.9.1
## [10] ComplexHeatmap_2.13.1 ggpubr_0.4.0 RColorBrewer_1.1-2
## [13] fastcluster_1.2.3 vegan_2.6-2 lattice_0.20-45
## [16] permute_0.9-5 webchem_1.1.2 forcats_0.5.1
## [19] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
## [22] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [25] tidyverse_1.3.1 readxl_1.3.1 cowplot_1.1.1
## [28] ggplot2_3.3.6 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 circlize_0.4.15 splines_4.1.1
## [4] GenomeInfoDb_1.30.1 digest_0.6.29 foreach_1.5.2
## [7] htmltools_0.5.2 magick_2.7.3 fansi_1.0.3
## [10] magrittr_2.0.3 cluster_2.1.3 doParallel_1.0.17
## [13] tzdb_0.3.0 Biostrings_2.62.0 modelr_0.1.8
## [16] matrixStats_0.62.0 colorspace_2.0-3 rvest_1.0.2
## [19] haven_2.5.0 xfun_0.31 crayon_1.4.1
## [22] RCurl_1.98-1.7 prismatic_1.1.0 jsonlite_1.8.0
## [25] iterators_1.0.13 glue_1.6.2 gtable_0.3.0
## [28] zlibbioc_1.40.0 XVector_0.34.0 GetoptLong_1.0.5
## [31] car_3.0-13 shape_1.4.6 BiocGenerics_0.40.0
## [34] abind_1.4-5 scales_1.1.1 mvtnorm_1.1-3
## [37] DBI_1.1.1 data.tree_1.0.0 rstatix_0.7.0
## [40] Rcpp_1.0.8.3 xtable_1.8-4 clue_0.3-61
## [43] stats4_4.1.1 httr_1.4.2 ellipsis_0.3.2
## [46] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.1
## [49] dbplyr_2.1.1 utf8_1.2.2 tidyselect_1.1.2
## [52] labeling_0.4.2 rlang_1.0.2 later_1.3.0
## [55] munsell_0.5.0 cellranger_1.1.0 tools_4.1.1
## [58] cli_3.3.0 generics_0.1.0 broom_0.8.0
## [61] evaluate_0.15 fastmap_1.1.0 yaml_2.3.5
## [64] rematch2_2.1.2 knitr_1.39 fs_1.5.0
## [67] nlme_3.1-159 mime_0.12 xml2_1.3.2
## [70] compiler_4.1.1 rstudioapi_0.13 beeswarm_0.4.0
## [73] png_0.1-7 ggsignif_0.6.3 reprex_2.0.1
## [76] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [79] Matrix_1.4-1 vctrs_0.4.1 pillar_1.7.0
## [82] lifecycle_1.0.0 jquerylib_0.1.4 GlobalOptions_0.1.2
## [85] estimability_1.4 bitops_1.0-7 httpuv_1.6.5
## [88] R6_2.5.1 promises_1.2.0.1 vipor_0.4.5
## [91] IRanges_2.28.0 codetools_0.2-18 MASS_7.3-57
## [94] assertthat_0.2.1 rjson_0.2.21 minpack.lm_1.2-1
## [97] withr_2.5.0 S4Vectors_0.32.4 GenomeInfoDbData_1.2.7
## [100] mgcv_1.8-40 parallel_4.1.1 hms_1.1.1
## [103] coda_0.19-4 rmarkdown_2.14 carData_3.0-5
## [106] Cairo_1.6-0 shiny_1.6.0
datadir <- "../SILArginine2022-03/"
hilic_intra <- readRDS(paste0(datadir, "SIL_Intra_HILIC_LowLevelDataAllProcessed.rds"))
labeled_comps <- hilic_intra[,sum(AreaFrac), by=list(CompID, NumLabeledC)][NumLabeledC > 0 & V1 > 1e5, unique(CompID)]
frac_labeling <- hilic_intra[ArgGroup=="HR" & Strain != "c",list(sum(AreaFrac[NumLabeledC==0]), sum(AreaFrac[NumLabeledC != 0])), by=CompID]
frac_labeling[,LabelFrac:=V2/(V2+V1)]
median_dat <- hilic_intra[,list(median(AreaFrac), mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by = list(CompID, FeatID, Name, Formula, MSI, NumLabeledC, ArgGroup, Strain, TimePoint, Time)]
most_abund_iso <- median_dat[ArgGroup == "HR" & Strain == "2243",list(NumLabeledC[which.max(V2)], max(V2)), by = list(CompID, Formula)]
most_abund_iso[order(V1, decreasing = T)][1:20]## CompID Formula V1
## 1: 348.22374_9.438_Unknown_Unknown_Pos C12 H28 N8 O4 12
## 2: 372.17373_8.808_Unknown_Unknown_Pos C13 H28 N2 O10 12
## 3: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos C12 H26 N6 O6 12
## 4: 404.11103_8.8_Unknown_Unknown_Pos C12 H26 N2 O9 P2 11
## 5: 264.18017_9.581_Unknown_Unknown_Pos C10 H24 N4 O4 10
## 6: 217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos C7 H15 N5 O3 7
## 7: 216.12228_8.032_Unknown_Unknown_Pos C8 H16 N4 O3 6
## 8: 202.10682_8.214_Unknown_Unknown_Pos C7 H14 N4 O3 6
## 9: 174.11167_9.894_Unknown_Unknown_Pos C6 H14 N4 O2 6
## 10: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg C6 H14 N4 O2 6
## 11: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos C6 H14 N4 O2 6
## 12: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos C6 H10 N2 O3 6
## 13: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos C6 H13 N3 O3 6
## 14: 111.07977_5.659_Labeled Unknown_Labeled Unknown_Pos C5 H9 N3 5
## 15: 129.09041_9.44_Unknown_Unknown_Pos C5 H11 N3 O 5
## 16: 132.08991_9.439_Unknown_Unknown_Neg C5 H12 N2 O2 5
## 17: 173.11665_9.234_Unknown_Unknown_Pos C7 H15 N3 O2 5
## 18: 174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos C7 H14 N2 O3 5
## 19: 129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos C5 H11 N3 O 5
## 20: 216.14758_8.58_Unknown_Unknown_Pos C10 H20 N2 O3 5
## V2
## 1: 32671283.1
## 2: 7334523.1
## 3: 74638415.9
## 4: 973554.1
## 5: 13346435.7
## 6: 16719255.0
## 7: 1611326.5
## 8: 2620802.6
## 9: 15436774.7
## 10: 169671769.7
## 11: 2433432201.2
## 12: 603774145.5
## 13: 1814734154.5
## 14: 6258371.8
## 15: 8208446.8
## 16: 10903381.0
## 17: 2186815.1
## 18: 2719810.6
## 19: 61488051.2
## 20: 5357241.5
most_abund_iso[V1 != 0 & grepl("Pos", CompID)][order(V2, decreasing = T)][1:30]## CompID Formula V1
## 1: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos C6 H14 N4 O2 6
## 2: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos C6 H13 N3 O3 6
## 3: 132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos C5 H12 N2 O2 5
## 4: 115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos C5 H9 N O2 5
## 5: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos C6 H10 N2 O3 6
## 6: 114.07955_9.582_Prolinamide_Prolinamide_Pos C5 H10 N2 O 5
## 7: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos C12 H26 N6 O6 12
## 8: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos C7 H14 N2 O3 5
## 9: 129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos C5 H11 N3 O 5
## 10: 69.05802_9.583_Labeled Unknown_Labeled Unknown_Pos C4 H7 N 4
## 11: 132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos C5 H12 N2 O2 5
## 12: 156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos C7 H12 N2 O2 5
## 13: 114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos C5 H10 N2 O 5
## 14: 348.22374_9.438_Unknown_Unknown_Pos C12 H28 N8 O4 12
## 15: 112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos C5 H8 N2 O 5
## 16: 217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos C7 H15 N5 O3 7
## 17: 174.11167_9.894_Unknown_Unknown_Pos C6 H14 N4 O2 6
## 18: 69.05802_8.774_Unknown_Unknown_Pos C4 H7 N 4
## 19: 264.18017_9.581_Unknown_Unknown_Pos C10 H24 N4 O4 10
## 20: 115.0747_6.17_Labeled Unknown_Labeled Unknown_Pos C4 H9 N3 O 4
## 21: 236.1528_8.727_Unknown_Unknown_Pos C13 H20 N2 O2 5
## 22: 129.09041_9.44_Unknown_Unknown_Pos C5 H11 N3 O 5
## 23: 132.09003_8.769_Unknown_Unknown_Pos C5 H12 N2 O2 5
## 24: 372.17373_8.808_Unknown_Unknown_Pos C13 H28 N2 O10 12
## 25: 113.04785_8.765_Unknown_Unknown_Pos C5 H7 N O2 5
## 26: 111.07977_5.659_Labeled Unknown_Labeled Unknown_Pos C5 H9 N3 5
## 27: 69.05801_5.659_Unknown_Unknown_Pos C4 H7 N 4
## 28: 216.14758_8.58_Unknown_Unknown_Pos C10 H20 N2 O3 5
## 29: 96.0689_9.583_Unknown_Unknown_Pos C5 H8 N2 5
## 30: 85.05285_4.196_Unknown_Unknown_Pos C4 H7 N O 4
## CompID Formula V1
## V2
## 1: 2433432201
## 2: 1814734155
## 3: 1516007668
## 4: 956660206
## 5: 603774146
## 6: 308427577
## 7: 74638416
## 8: 65372136
## 9: 61488051
## 10: 48550460
## 11: 46276921
## 12: 44160266
## 13: 34578251
## 14: 32671283
## 15: 30532688
## 16: 16719255
## 17: 15436775
## 18: 14340714
## 19: 13346436
## 20: 11673893
## 21: 9470794
## 22: 8208447
## 23: 7359230
## 24: 7334523
## 25: 6346959
## 26: 6258372
## 27: 5999499
## 28: 5357242
## 29: 4691248
## 30: 3524013
## V2
most_abund_iso[V1 != 0 & grepl("Neg", CompID)][order(V2, decreasing = T)][1:20]## CompID
## 1: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg
## 2: 132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
## 3: 132.08991_9.579_Ornithine_Ornithine_Neg
## 4: 132.08991_9.439_Unknown_Unknown_Neg
## 5: 607.08266_9.886_UDP-N-acetylglucosamine_UDP-N-acetylglucosamine_Neg
## 6: <NA>
## 7: <NA>
## 8: <NA>
## 9: <NA>
## 10: <NA>
## 11: <NA>
## 12: <NA>
## 13: <NA>
## 14: <NA>
## 15: <NA>
## 16: <NA>
## 17: <NA>
## 18: <NA>
## 19: <NA>
## 20: <NA>
## Formula V1 V2
## 1: C6 H14 N4 O2 6 169671769.7
## 2: C5 H12 N2 O2 5 62003057.5
## 3: C5 H12 N2 O2 5 20662039.7
## 4: C5 H12 N2 O2 5 10903381.0
## 5: C17 H27 N3 O17 P2 1 222146.8
## 6: <NA> NA NA
## 7: <NA> NA NA
## 8: <NA> NA NA
## 9: <NA> NA NA
## 10: <NA> NA NA
## 11: <NA> NA NA
## 12: <NA> NA NA
## 13: <NA> NA NA
## 14: <NA> NA NA
## 15: <NA> NA NA
## 16: <NA> NA NA
## 17: <NA> NA NA
## 18: <NA> NA NA
## 19: <NA> NA NA
## 20: <NA> NA NA
median_dat[NumLabeledC > 4 & ArgGroup == "HR" & Strain == "2243" & V2 > 1e7][order(V2, decreasing = T)]## CompID
## 1: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 2: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 3: 132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos
## 4: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 5: 132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos
## 6: 115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos
## 7: 115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos
## 8: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 9: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 10: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 11: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 12: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 13: 114.07955_9.582_Prolinamide_Prolinamide_Pos
## 14: 115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos
## 15: 114.07955_9.582_Prolinamide_Prolinamide_Pos
## 16: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 17: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg
## 18: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 19: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 20: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 21: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos
## 22: 132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
## 23: 129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos
## 24: 132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
## 25: 132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos
## 26: 156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos
## 27: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 28: 156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos
## 29: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 30: 114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos
## 31: 348.22374_9.438_Unknown_Unknown_Pos
## 32: 112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos
## 33: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 34: 114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos
## 35: 132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos
## 36: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg
## 37: 132.08991_9.579_Ornithine_Ornithine_Neg
## 38: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 39: 112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos
## 40: 217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos
## 41: 174.11167_9.894_Unknown_Unknown_Pos
## 42: 264.18017_9.581_Unknown_Unknown_Pos
## 43: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 44: 132.08991_9.439_Unknown_Unknown_Neg
## CompID
## FeatID
## 1: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate6
## 2: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate6
## 3: 132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos_ExRate5
## 4: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate6
## 5: 132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos_ExRate5
## 6: 115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 7: 115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 8: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate6
## 9: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate5
## 10: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate6
## 11: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate6
## 12: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate5
## 13: 114.07955_9.582_Prolinamide_Prolinamide_Pos_ExRate5
## 14: 115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos_ExRate5
## 15: 114.07955_9.582_Prolinamide_Prolinamide_Pos_ExRate5
## 16: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 17: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg_ExRate6
## 18: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate5
## 19: 158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 20: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate12
## 21: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos_ExRate5
## 22: 132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg_ExRate5
## 23: 129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 24: 132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg_ExRate5
## 25: 132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 26: 156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 27: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate11
## 28: 156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 29: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate12
## 30: 114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 31: 348.22374_9.438_Unknown_Unknown_Pos_ExRate12
## 32: 112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 33: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate11
## 34: 114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 35: 132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 36: 174.11163_9.439_DL-Arginine_DL-Arginine_Neg_ExRate6
## 37: 132.08991_9.579_Ornithine_Ornithine_Neg_ExRate5
## 38: 174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate5
## 39: 112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 40: 217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos_ExRate7
## 41: 174.11167_9.894_Unknown_Unknown_Pos_ExRate6
## 42: 264.18017_9.581_Unknown_Unknown_Pos_ExRate10
## 43: 350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate10
## 44: 132.08991_9.439_Unknown_Unknown_Neg_ExRate5
## FeatID
## Name Formula MSI NumLabeledC ArgGroup Strain
## 1: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 6 HR 2243
## 2: L-(+)-Citrulline C6 H13 N3 O3 MSI_1;Labeled 6 HR 2243
## 3: L(+)-Ornithine C5 H12 N2 O2 MSI_1;Labeled 5 HR 2243
## 4: L-(+)-Citrulline C6 H13 N3 O3 MSI_1;Labeled 6 HR 2243
## 5: L(+)-Ornithine C5 H12 N2 O2 MSI_1;Labeled 5 HR 2243
## 6: Labeled Unknown C5 H9 N O2 Labeled 5 HR 2243
## 7: Labeled Unknown C5 H9 N O2 Labeled 5 HR 2243
## 8: Labeled Unknown C6 H10 N2 O3 Labeled 6 HR 2243
## 9: L-(+)-Citrulline C6 H13 N3 O3 MSI_1;Labeled 5 HR 2243
## 10: Labeled Unknown C6 H10 N2 O3 Labeled 6 HR 2243
## 11: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 6 HR 2243
## 12: L-(+)-Citrulline C6 H13 N3 O3 MSI_1;Labeled 5 HR 2243
## 13: Prolinamide C5 H10 N2 O MSI_3;Labeled 5 HR 2243
## 14: D-(+)-Proline C5 H9 N O2 MSI_1;Labeled 5 HR 2243
## 15: Prolinamide C5 H10 N2 O MSI_3;Labeled 5 HR 2243
## 16: Labeled Unknown C6 H10 N2 O3 Labeled 5 HR 2243
## 17: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 6 HR 2243
## 18: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 5 HR 2243
## 19: Labeled Unknown C6 H10 N2 O3 Labeled 5 HR 2243
## 20: Labeled Unknown C12 H26 N6 O6 Labeled 12 HR 2243
## 21: N-Acetylornithine C7 H14 N2 O3 MSI_3;Labeled 5 HR 2243
## 22: Labeled Unknown C5 H12 N2 O2 Labeled 5 HR 2243
## 23: Labeled Unknown C5 H11 N3 O Labeled 5 HR 2243
## 24: Labeled Unknown C5 H12 N2 O2 Labeled 5 HR 2243
## 25: Labeled Unknown C5 H12 N2 O2 Labeled 5 HR 2243
## 26: Labeled Unknown C7 H12 N2 O2 Labeled 5 HR 2243
## 27: Labeled Unknown C12 H26 N6 O6 Labeled 11 HR 2243
## 28: Labeled Unknown C7 H12 N2 O2 Labeled 5 HR 2243
## 29: Labeled Unknown C12 H26 N6 O6 Labeled 12 HR 2243
## 30: Labeled Unknown C5 H10 N2 O Labeled 5 HR 2243
## 31: Unknown C12 H28 N8 O4 UnknownNA 12 HR 2243
## 32: Labeled Unknown C5 H8 N2 O Labeled 5 HR 2243
## 33: Labeled Unknown C12 H26 N6 O6 Labeled 11 HR 2243
## 34: Labeled Unknown C5 H10 N2 O Labeled 5 HR 2243
## 35: Labeled Unknown C5 H12 N2 O2 Labeled 5 HR 2243
## 36: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 6 HR 2243
## 37: Ornithine C5 H12 N2 O2 MSI_1:Labeled 5 HR 2243
## 38: DL-Arginine C6 H14 N4 O2 MSI_1;Labeled 5 HR 2243
## 39: Labeled Unknown C5 H8 N2 O Labeled 5 HR 2243
## 40: Labeled Unknown C7 H15 N5 O3 Labeled 7 HR 2243
## 41: Unknown C6 H14 N4 O2 UnknownNA 6 HR 2243
## 42: Unknown C10 H24 N4 O4 UnknownNA 10 HR 2243
## 43: Labeled Unknown C12 H26 N6 O6 Labeled 10 HR 2243
## 44: Unknown C5 H12 N2 O2 UnknownNA 5 HR 2243
## Name Formula MSI NumLabeledC ArgGroup Strain
## TimePoint Time V1 V2 V3
## 1: TP3 44 2800397763 2433432201 382702639
## 2: TP3 44 1767954825 1814734155 184582720
## 3: TP4 56 1656425405 1516007668 179889527
## 4: TP4 56 1197196415 1186464150 236262499
## 5: TP3 44 1133172993 1133038728 15864483
## 6: TP4 56 1062857832 956660206 110799577
## 7: TP3 44 763543885 783875160 23366967
## 8: TP3 44 583347351 603774146 60067528
## 9: TP4 56 575143733 595977430 69713210
## 10: TP4 56 424226146 400286091 70028594
## 11: TP4 56 68687100 352221281 317903396
## 12: TP3 44 277247672 340676057 73422594
## 13: TP4 56 337860170 308427577 37518663
## 14: TP4 56 243836327 282383359 145379078
## 15: TP3 44 243401774 244825242 1666906
## 16: TP4 56 209903719 209629445 22842592
## 17: TP3 44 164824316 169671770 44107138
## 18: TP3 44 142547048 125117750 19568646
## 19: TP3 44 95848036 116459815 25916026
## 20: TP3 44 74413926 74638416 4029161
## 21: TP4 56 75060415 65372136 13501343
## 22: TP3 44 64425396 62003058 5398596
## 23: TP3 44 58308097 61488051 21332380
## 24: TP4 56 44955647 50784230 10983747
## 25: TP4 56 47806664 46276921 2859598
## 26: TP4 56 41921306 44160266 2747735
## 27: TP4 56 36649678 37475301 1421078
## 28: TP3 44 39130481 37117172 2365158
## 29: TP4 56 38225076 36117309 8146416
## 30: TP4 56 35579425 34578251 4065184
## 31: TP3 44 36248167 32671283 10732100
## 32: TP3 44 32405282 30532688 5361084
## 33: TP3 44 25407180 27119842 3566237
## 34: TP3 44 20843708 21717832 2105291
## 35: TP3 44 21409686 21704627 1990498
## 36: TP4 56 9414202 21368153 16469454
## 37: TP3 44 21716404 20662040 1872655
## 38: TP4 56 3104331 19765075 18234943
## 39: TP4 56 20374372 19551392 4619979
## 40: TP3 44 15444557 16719255 5788029
## 41: TP3 44 16553992 15436775 1197296
## 42: TP4 56 14672438 13346436 1569095
## 43: TP4 56 10501751 12445262 3346754
## 44: TP3 44 9098434 10903381 4081690
## TimePoint Time V1 V2 V3
top_labeled_comps <- frac_labeling[V2 > 1e5 & LabelFrac > 0.1, CompID]
top_labeled_comps2<- frac_labeling[V2 > 1e7 & V1+V2 > 5e7 & LabelFrac > 0.1, CompID]
## How many features have >5 labeled carbons?
unique(hilic_intra[NumLabeledC > 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR" & !grepl("rginine", Name) &
!grepl("itrulline", Name) & !grepl("rnithine", Name) & TimePoint=="TP4",
list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]## [1] "404.11103_8.8_Unknown_Unknown_Pos"
## [2] "135.10497_1.603_Unknown_Unknown_Pos"
## [3] "348.22374_9.438_Unknown_Unknown_Pos"
## [4] "171.12614_1.113_Unknown_Unknown_Pos"
## [5] "174.11167_9.894_Unknown_Unknown_Pos"
## [6] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"
## [7] "372.17373_8.808_Unknown_Unknown_Pos"
## [8] "264.18017_9.581_Unknown_Unknown_Pos"
## [9] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"
## [10] "155.06964_9.527_Unknown_Unknown_Pos"
## [11] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"
unique(hilic_intra[NumLabeledC >= 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR" & TimePoint=="TP4",
list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]## [1] "173.11665_9.234_Unknown_Unknown_Pos"
## [2] "174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos"
## [3] "129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos"
## [4] "404.11103_8.8_Unknown_Unknown_Pos"
## [5] "216.14758_8.58_Unknown_Unknown_Pos"
## [6] "135.10497_1.603_Unknown_Unknown_Pos"
## [7] "348.22374_9.438_Unknown_Unknown_Pos"
## [8] "129.09041_9.44_Unknown_Unknown_Pos"
## [9] "132.08991_9.439_Unknown_Unknown_Neg"
## [10] "171.12614_1.113_Unknown_Unknown_Pos"
## [11] "96.0689_9.583_Unknown_Unknown_Pos"
## [12] "174.11167_9.894_Unknown_Unknown_Pos"
## [13] "227.01498_7.695_Unknown_Unknown_Pos"
## [14] "113.04785_8.765_Unknown_Unknown_Pos"
## [15] "132.09003_8.769_Unknown_Unknown_Pos"
## [16] "236.1528_8.727_Unknown_Unknown_Pos"
## [17] "132.08991_9.579_Ornithine_Ornithine_Neg"
## [18] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"
## [19] "174.11163_9.439_DL-Arginine_DL-Arginine_Neg"
## [20] "372.17373_8.808_Unknown_Unknown_Pos"
## [21] "264.18017_9.581_Unknown_Unknown_Pos"
## [22] "174.11167_9.439_DL-Arginine_DL-Arginine_Pos"
## [23] "112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos"
## [24] "114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos"
## [25] "132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg"
## [26] "174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos"
## [27] "132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos"
## [28] "156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos"
## [29] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"
## [30] "155.06964_9.527_Unknown_Unknown_Pos"
## [31] "114.07955_9.582_Prolinamide_Prolinamide_Pos"
## [32] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"
## [33] "115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos"
## [34] "132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos"
## [35] "115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos"
## [36] "175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos"
bad_comps <- hilic_intra[NumLabeledC >= 5 & AreaFrac > 1e6& ArgGroup != "HR" & value > 5,
list(NumLabeledC, Name, Formula, CompID)][,unique(CompID)]
unique(hilic_intra[NumLabeledC >= 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR" & TimePoint=="TP4" & !CompID %in% bad_comps,
list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]## [1] "173.11665_9.234_Unknown_Unknown_Pos"
## [2] "174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos"
## [3] "129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos"
## [4] "404.11103_8.8_Unknown_Unknown_Pos"
## [5] "216.14758_8.58_Unknown_Unknown_Pos"
## [6] "348.22374_9.438_Unknown_Unknown_Pos"
## [7] "129.09041_9.44_Unknown_Unknown_Pos"
## [8] "132.08991_9.439_Unknown_Unknown_Neg"
## [9] "96.0689_9.583_Unknown_Unknown_Pos"
## [10] "174.11167_9.894_Unknown_Unknown_Pos"
## [11] "227.01498_7.695_Unknown_Unknown_Pos"
## [12] "113.04785_8.765_Unknown_Unknown_Pos"
## [13] "132.09003_8.769_Unknown_Unknown_Pos"
## [14] "236.1528_8.727_Unknown_Unknown_Pos"
## [15] "132.08991_9.579_Ornithine_Ornithine_Neg"
## [16] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"
## [17] "174.11163_9.439_DL-Arginine_DL-Arginine_Neg"
## [18] "372.17373_8.808_Unknown_Unknown_Pos"
## [19] "264.18017_9.581_Unknown_Unknown_Pos"
## [20] "174.11167_9.439_DL-Arginine_DL-Arginine_Pos"
## [21] "112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos"
## [22] "132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg"
## [23] "174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos"
## [24] "132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos"
## [25] "156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos"
## [26] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"
## [27] "155.06964_9.527_Unknown_Unknown_Pos"
## [28] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"
## [29] "115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos"
hilic_intra[!CompID %in% bad_comps, length(unique(CompID))]## [1] 234
feat_data_wide <- dcast(median_dat[ArgGroup == "HR"], FeatID+CompID+Formula+Name+NumLabeledC~Strain+TimePoint, value.var = "V1")
max_vals <- hilic_intra[,.(MaxVal = max(AreaFrac)), by = FeatID]
feat_data_wide <- merge(feat_data_wide, max_vals, by = "FeatID", all.x = T)
new_color_scale <- c("gray", brewer.pal(9, "YlGnBu"))
median_dat[,NumLabeledC_v2:=ifelse(NumLabeledC >= 9, "9 or more", NumLabeledC)]
tot_vals <- feat_data_wide[,list(sum(`2243_TP4`), sum(c_TP3)), by = CompID]
tot_vals[,log2FC:=log2((V2+1000)/(V1+1000))]
comp_order <- median_dat[V1 > 1e5 & Strain == "2243", list(max(NumLabeledC), V1[which.max(NumLabeledC)]), by = CompID][order(V1, V2, decreasing = T), CompID]
top_id_comps <- top_labeled_comps[!grepl("known", top_labeled_comps)]
top_id_comps <- sort(top_id_comps)[c(1:4,6, 7, 8, 9, 10, 11, 13, 14, 15:18)]
median_dat[,NumLabeledC2:=paste0("M+", NumLabeledC_v2)]
comp_order2 <- median_dat[V1 > 1e5 & Strain == "2243" & CompID %in% top_id_comps, list(max(NumLabeledC), V1[which.max(NumLabeledC)]), by = Name][order(V1, V2, decreasing = T), Name]
hilic_intra_labels <- ggplot(median_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% top_id_comps & !grepl("known", Name)],
aes(x = factor(Time), y = V2, fill = factor(NumLabeledC2))) + geom_bar(stat = "identity", position = "fill", color = "black") + facet_wrap(~factor(Name, levels = comp_order2), ncol = 4, scales = "free_y") + scale_fill_manual(values = new_color_scale, name = "Labeling Pattern") + theme(axis.text = element_text(size = 9), strip.text = element_text(size = 9)) + xlab("Time (hr)") + ylab("Share of Peak Area") + theme(strip.background = element_blank())
ggsave(hilic_intra_labels, file = paste0(outdir, "hilic_intra_arg_topIDcomps_sub.pdf"), width = 6, height = 4.5)
hilic_intra_labels############# Supp table
tab_save <- median_dat[(CompID %in% top_labeled_comps) & Strain != "c" & ArgGroup == "HR"][V1 != 0 & V2 != 0]
setnames(tab_save, c("V2", "V3"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, ArgGroup)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]
tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)] & Time == 56], CompID+Name+MSI+Formula+Strain ~ paste0("M+", NumLabeledC), value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[1])
})]
tab_save[,RT:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[2])
})]
tab_save[,MSI:=gsub("MSI_", "", str_extract(MSI, "MSI_[0-9]"))]
tab_save[,CompID:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(paste(foo[c(1,2,3,5)], collapse = "_"))
})]
tab_save <- tab_save[,c("CompID", "Name", "MSI", "MolWt", "Formula", "RT", sort(names(tab_save)[6:44])), with=F]
fwrite(tab_save[order(MolWt)], file = paste0(outdir, "intracellular_arg_labeling_summary.tsv"), sep = "\t")####################### Extracellular
rm(list = ls())
datadir <- "../SILArginine2022-03/"
outdir <- "figure_s6/"
hilic_extra <- readRDS(paste0(datadir, "SIL_Extra_HILIC_LowLevelDataAllProcessed.rds"))
hilic_extra[,MinFeatValue:=min(AreaFrac[AreaFrac != 0]), by = FeatID]
hilic_extra[,log10value:=log10(AreaFrac + 0.25*MinFeatValue)]
labeled_comps <- hilic_extra[,sum(AreaFrac), by=list(CompID, NumLabeledC)][NumLabeledC > 0 & V1 > 1e5, unique(CompID)]
frac_labeling <- hilic_extra[ArgGroup=="HR" & Strain != "c",list(sum(AreaFrac[NumLabeledC==0]), sum(AreaFrac[NumLabeledC != 0])), by=CompID]
frac_labeling[,LabelFrac:=V2/(V2+V1)]
top_labeled_comps <- frac_labeling[V2 > 1e5 & LabelFrac > 0.1, CompID]
top_labeled_comps2<- frac_labeling[V2 > 1e7 & V1+V2 > 5e7 & LabelFrac > 0.1, CompID]
top_labeled_comps3 <- frac_labeling[LabelFrac > 0.05, CompID]
median_dat <- hilic_extra[,median(AreaFrac), by = list(CompID, Formula, FeatID, NumLabeledC, Name, ArgGroup, Strain, TimePoint, Time)]
most_abund_iso <- median_dat[ArgGroup == "HR" & Strain == "2243",list(NumLabeledC[which.max(V1)], max(V1)), by = list(CompID, Formula)]
hilic_extra[,IonMode:=ifelse(grepl("Pos$", CompID), "Pos", "Neg")]
mean_dat <- hilic_extra[,list(mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac)), mean(value)),
by = list(CompID, FeatID, Formula, MSI, NumLabeledC, IonMode, ArgGroup, Strain, TimePoint, Time, Name)]
mean_dat_sub <- mean_dat[ArgGroup == "HR" & Strain == "2243" & FeatID %in% most_abund_iso[V1 != 0 & grepl("Pos$", CompID)][order(V2, decreasing = T)][1:3, paste0(CompID, "_ExRate", V1)]]
mean_dat_sub[,Name2:=factor(Name, levels = c("DL-Arginine", "L-(+)-Citrulline", "L(+)-Ornithine"))]
levels(mean_dat_sub$Name2) <- c("Arginine\n(6 labels)", "Citrulline\n(6 labels)", "Ornithine\n(5 labels)")
arg_plot <- ggplot(mean_dat_sub, aes(x = Time, y = V1, color = Name2)) + geom_point(size = 2) + geom_line(aes(group = FeatID)) + geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0) + scale_color_brewer(palette = "Set1", name = "") + ylab("Peak area in supernatant") + xlab("Time (hr)") + theme(axis.text = element_text(size = 10))
ggsave(arg_plot, file = paste0(outdir, "arg_labels.pdf"), width = 4, height = 2.6)
arg_plotfeat_data_wide <- dcast(mean_dat[Strain == "2243" & ArgGroup == "HR"], FeatID+CompID+Formula+Name+NumLabeledC~TimePoint, value.var = "V1")
max_vals <- hilic_extra[,.(MaxVal = max(AreaFrac)), by = FeatID]
feat_data_wide <- merge(feat_data_wide, max_vals, by = "FeatID", all.x = T)
feat_data_wide[,log2FCTP4_TP0:=log2((TP4+1000)/(TP0+1000))]
new_color_scale <- c("gray", brewer.pal(9, "YlGnBu"))
tot_vals <- feat_data_wide[,list(sum(TP0), sum(TP4)), by = CompID]
tot_vals[,log2FC:=log2((V2+1000)/(V1+1000))]
comp_order <- tot_vals[,log2FC[which.max(abs(log2FC))], by = CompID][order(V1), CompID]
top_comps2 <- feat_data_wide[(log2FCTP4_TP0 > 4|Name=="DL-Arginine") & MaxVal > 1e6 & NumLabeledC > 0, unique(CompID)]
top_comps2b <- top_comps2[!grepl("known", top_comps2)]
top_comps2b <- sort(top_comps2b)[c(1:5, 7:15, 17, 18, 20, 21, 22)]
top_comps3 <- intersect(top_comps2b, top_labeled_comps3)
comp_order2 <- feat_data_wide[CompID %in% top_comps2b & TP4 != 0,
list(max(NumLabeledC), TP4[which.max(NumLabeledC)], NumLabeledC[which.max(TP4)]), by = Name][order(V3, V1, V2, decreasing = T), Name]
comp_order2 <- comp_order2[c(2, 1, 3:19)]
mean_dat[,NumLC2:=paste0("M+", NumLabeledC)]
hilic_extra_labels <- ggplot(mean_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% top_comps3 & !grepl("known", Name)],
aes(x = factor(Time), y = V1, fill = factor(NumLC2))) + geom_bar(stat = "identity", color = "black") +
facet_wrap(~factor(Name, levels = comp_order2), ncol = 3, scales = "free_y") +
scale_fill_manual(values = new_color_scale, name = "Labeling Pattern") +
theme(axis.text = element_text(size = 7.5), strip.text = element_text(size = 9)) + xlab("Time (hr)") +
ylab("Mean Peak Area from Supernatant") + theme(strip.background = element_blank())
ggsave(hilic_extra_labels, file = paste0(outdir, "sil_arg_hilic_extra_barplots_v2.pdf"), width = 7, height = 6)
hilic_extra_labelsmost_abund_comps_pos <- mean_dat[ArgGroup == "HR" & Strain == "2243" & grepl("Pos$", CompID), sum(V1), by=CompID][order(V1, decreasing = T)][1:12, CompID]
tot_signal_plot_extra <- ggplot(mean_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% most_abund_comps_pos &
NumLC2 %in% c("M+0", "M+5", "M+6")], aes(x = factor(Time), fill = factor(CompID, levels = most_abund_comps_pos), y = V1)) +
geom_bar(position = "stack", stat = "identity", color = "black") + facet_wrap(~NumLC2, nrow = 1) +
scale_fill_manual(values = brewer.pal(12, "Set3"), name = "Compound ID") + ylab("Mean Peak Area") + xlab("Time (hr)") +
theme(legend.position = "bottom", strip.text = element_text(size = 12)) + guides(fill = guide_legend(ncol = 2))#+ guides(fill = "none")
ggsave(tot_signal_plot_extra, file = paste0(outdir, "totalSignalExtracellular.pdf"), width = 7, height = 5)
tot_signal_plot_extra############# Supp table
tab_save <- mean_dat[(CompID %in% intersect(top_comps2, top_labeled_comps3)) & Strain != "c" & ArgGroup == "HR"][V1 != 0]
setnames(tab_save, c("V1", "V2"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, ArgGroup)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]
tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)] & Time == 56], CompID+Name+MSI+Formula+Strain ~ paste0("M+", NumLabeledC), value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[1])
})]
tab_save[,RT:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(foo[2])
})]
tab_save[,MSI:=gsub("MSI_", "", str_extract(MSI, "MSI_[0-9]"))]
tab_save[,CompID:=sapply(CompID, function(x){
foo <- strsplit(x, split = "_")[[1]]
return(paste(foo[c(1,2,3,5)], collapse = "_"))
})]
tab_save <- tab_save[,c("CompID", "Name", "MSI", "MolWt", "Formula", "RT", sort(names(tab_save)[6:44])), with=F]
fwrite(tab_save[order(MolWt)], file = paste0(outdir, "extracellular_arg_labeling_summary.tsv"), sep = "\t")